Definition 1580 

def muKsize.h.vs.v.i.ws.w.j) as * 

muh*- ((vs&v s i Z e-i + i)h-size||v si2 e.i + i..i) * ((ws&w s ize-Uj) h - size ||w S i Z e-i-H i) 
enddef ' 

def WideMultiplyMatrixExtract(op,ra,rb,rc,rd) 
d-*-RegRead(rd, 128) 
c-*-RegRead(rc, 64) 
b-*-RegRead(rb, 128) 
case bs o of 
0..255: 

sgsize -»-1 28 
256..383: 

sgsize-*-64 
384..447: 

sgsize-*-32 
448..479: 

sgsize -*-1 6 
480..495: 

sgsize -«-8 
496..503: 

sgsize-*-4 
504. .507: 

sgsize -*-2 
508.. 511: 

sgsize -*-1 

endcase 

l-*-bn 

nv*-bi2 

n-«-bi3 

signed-*-bi4 

if c 3 o * 0 then 

wsize-*-(c and (0-c))|| 0 4 

t-*-c and (c-1) 

else 

wsize-«-128 
t-»-c 

endif 

if sgsize < 8 then 

gsize-*-8 
elseif sgsize > wsize/2 then 

gsize-^-wsize/2 

else 
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gsize^-sgsize 
endif 

lgsize-«-log(gsize 
Iwsize^-log(wsize) 

^ t|wsize+6-n-lgsize..lwsize-3 * 0 then 
msize-*-(t and (0-t))||(H 
VirtAddr 1 and (t-1) 

else 

msize 64*(2-n)*wsize/gsize 
VirtAddr^-t 
endif 

vsize -*-(1+n)*msize*gsize/wsize 

mm LoadMemory(c,VirtAddr,msize, order) 

Imsize log(msize) 

if (VirtAddr, msize . 4 o^O then 

raise AccessDisaliowedByVirtualAddress 
endif 

case op of 

W.MULMAT.X.B: 

orders B 
W.MULMAT.X.L: 

order L 

endcase 
ms-#-signed 
ds -^-signed A m 
as-*-signed or m 
spos^-(b8..o) and (2*gsize-1) 
dpos-*-(0||b 2 3..i6) and (gsize-1) 
r -«-spos 

sfsize-«-(0|| b 31 24) and (gsize-1) 

tfsize -«-(sfsize = 0) or ((sfsize+dpos) > gsize) ? gsize-dpos : sfsize 
fsize (tfsize + spos > h) ? h - spos : tfsize 
if (bio 9 - Z) & -signed then 
rnd-*-F 

else 

rnd-«- bio..9 
endif 
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.1580 



for i -*-0 to wsize-gsize by gsize 
q[0] ()2*gsize+7-igsize 

for j 0 to vsize-gsize by gsize 
if n then 

if (~) & j & gsize = 0 then 

k-*- i-(j&gsize)+wsize*j 8Jgsize+1 
q[i+gsize]-#- q[i] + muKgsize.h.rr^mmXds.dj) 

else 

k i+gsize+wsize*j 8Jgsize+1 
q[i+gsize]-«- q[i] - muKgsize.h.ms.mm.k.ds.d.j) 

endif 

else 

q[i+gsize]-*-q[i] = mul(gsize,h,ms,mm,i+j*wsize/gsize l ds,d l j) 

endif 
endfor 
P^q[128] 
case rnd of 
none, N: 

s^O h - r |hp r ||pr-i 



s 

s- 



0 h 

■ 0h-r||ir 



endcase 

v^-((ds4ph-l)||p) + (0|| s) 



* (Vh..r+fsize= (as & v r ^ fsize .i ) h * 1 * r fsJze ) or not I then 

w -*-(as & v r+fsize .i)9si2e-fsize.dpos|| Vfsj2e _ 1+r r || 0 dpos 

else 

W-«-(s ? (v h ||-v9 size * d P° s " 1 ) : l9size-dposj ||Qdpos 

endif 

w 



a$ize-l+Li 
endfor 

3l27..wsize^*"0 
RegWrite(ra, 128, a) 
enddef 
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Exceptions 



Access disallowed by virtual address 
Access disallowed by tag 
Access disallowed by global TB 
Access disallowed by local TB 
Access detail required by tag 
Access detail required by local TB 
Access detail required by global TB 
Local TB miss 
Global TB miss 
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Operation codes 



1610 



W.MUL.MAT.X.I.8.B 


Wide multiply matrix extract immediate signed byte big-endian 


W.MUL.MAT.X.I.8.L 


Wide multiply matrix extract immediate signed byte little-endian 


W. MUL MAT.X. 1. 1 6. B 


Wide multiply matrix extract immediate signed doublet big-endian 


W.MULMAT.X.I.16.L 


Wide multiply matrix extract immediate signed doublet little-endian 


W.MULMAT.X.I.32.B 


Wide multiply matrix extract immediate signed quadlet big-endian 


W.MUL.MAT.X.I.32.L 


Wide multiply matrix extract immediate signed quadlet little-endian 


W.MULMAT.X.I.64.B 


Wide multiply matrix extract immediate signed octlets big-endian 


W.MUL.MAT.X.I.64.L 


Wide multiply matrix extract immediate signed octlets little-endian 


W.MUL.MAT.X.I.C.8.B 


Wide multiply matrix extract immediate complex bytes big-endian 


W.MULMAT.X.I.C.8.L 


Wide multiply matrix extract immediate complex bytes little-endian 


W.MUL.MAT.X.I.C.16.B 


Wide multiply matrix extract immediate complex doublets big-endian 


W.MULMAT.X.I.C.16.L 


Wide multiply matrix extract immediate complex doublets little-endian 


W.MULMAT.X.I.C.32.B 


Wide multiply matrix extract immediate complex quadlets big-endian 


W.MULMAT.X.I.C.32.L 


Wide multiply matrix extract immediate complex quadlets little-endian 



Selection 



class 


op 


type 


size 


order 


wide multiply 
extract immediate 


W.MUL.MAT.X.I 


NONE 


8 16 32 64 


LB 


C 


8 16 32 


LB 



Format 

W.op.tsize.order rd=rc,rb, i 
rd=woptsizeorder(rc,rb,i) 
31 24 23 



18 17 



12 11 



6 5 4 32 
HE 



W.op.order 



rd 



rc 



rb 



t 



sh 



8 



1 2 



sz-*- log(size) - 3 
assert size+3 > i > size-4 
sh i - size 
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1630 



1023 m[rc](128*128/size) 



C 



Xextract/ 



\extract/ , , \extraoi/ , , \extract/ , ^extract/ , r 



Xextrac^ / 



i r 



Xextrac^ / 



127 



rd(128) 



Xextract/ 



128 rd(128) 0 

Wide multiply matrix extract immediate doublets 
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1660 



rc(64*128/size) 




rb(128) 



\extracy 

i ■ 



T 



\extract/ , . Vxtrac^ , r \extrac{/ ' 1 XextracV ^, p 



Xextrac^ 



\extracy / 

i 



i 



\extracy / 



128 rd(128) 0 

Wide multiply matrix extract immediate complex doublets 
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def mul(size,h,vs,v,i,ws,w,j) as 

mul ^((vs&Vsize-1^i) h - size ||v S ize-1*i..i) * ((ws&W s ize-H) h " Size |l W size .i4j..j ) 
enddef 

def WideMultiplyMatrixExtractimmediatetop^ype^size/d^c^b.sh) 
c-«-RegRead(rc f 64) 
b-*-RegRead(rb, 128) 
lgsize-*-log(gsize) 
case type of 
NONE: 

if c lgsize-4..0 * 0 then 

raise AccessDisallowedBy VirtualAddress 
endif 

if c 3..lgsize-3 * 0 then 

wsize -«-(c and (0-c))||0 4 
t-*-c and (c-1) 

else 

wsize-*- 128 
t-«-c 

endif 

lwsize-«-log(wsize) 

if t|wsize+6-lgsize..lwsize-3 * 0 then 
msize -*-(t and (0-t))||0 4 
VirtAddr-*-tand (t-1) 

else 

msize 1 28*wsize/gsize 
VirtAddr-*- 1 

C: 

if C|gsize-4..0 * 0 then 

raise AccessDisallowedByVirtualAddress 

endif 

if C3..lgsize-3 * 0 then 

wsize -*-(c and (0-c)) || 0 4 
t-*-c and (c-1) 

else 

wsize -#-128 
t-*-c 
endif 

Iwsize^-log(wsize) 
if t|wsize+5-lgsize..!wsize-3 * 0 then 
msize -«-(t and (0-t))|| 0 4 
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VirtAddr-*- t and (t-1) ^-—1680 

else 

msize -*-64*wsize/gsize 
VirtAddr-*-t 

endif 

vsize 2*msize*gsize/wsize 

endcase 
case of of 

W.MULMAT.X.I.B: 

order-*- B 
W.MULMAT.X.I.L: 
orders L 

endcase 

as-*-ms-*-bs-*-1 

m LoadMemory (c.VirtAddr.msize.order) 
h (2*gsize) + 7 - lgsize-(ms and bs) 
r gsize + (sh|||sh) 
for-*-0 to wsize-gsize by gsize 
q[0] 02*gsize+7-lgsize 

for 0 to vsize-gsize by gsize 
case type of 
NONE: 

q[j+gsize] -*-q[i] + mulfgsize.h.ms.m.i+wsize* 
J8..lgsize,bs,b,j ) 

if (H) & j & gsize = 0 then 

k-*-i-a&gsize) + wsize*j 8Jgsize+1 
qu+gsizel^qfij + mulfgsize.h.ms.m.k.bs.bj) 

else 

k-^-i + gsize +W size*j 8Jgsize+1 
endif " mul(gsize ' h>ms ' m ' k ' bs ' b 'i) 

endcase 
endfor 

p-*-q[vsize] 
S^0h-r||~ Pr J| pr-1 

v^((as&p M )||p) +( o||s) 
" ( v h..r+gsize = (as & v r+gs j ze _.| )h+1-r-gsize then 
agsize-1+i..i Vg S ize-1+r..r 

else 

agsize-Ui..i-*- as ? (v h ||~vf ize - 1 ) : i gs ize 

endif 
endfor 

a 127..wsize 0 

RegWrite(rd, 128, a) 
ddef FIG. 16D-2 



Exceptions 



Access disallowed by virtual address 
Access disallowed by tag 
Access disallowed by global TB 
Access disallowed by local TB 
Access detail required by tag 
Access detail required by local TB 
Access detail required by global TB 
Local TB miss 
Global TB miss 
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Operation codes 



W.MULMAT.C.F.16.B 


Wide multiply matrix complex floating-point half big-endian 


W.MUL.MAT.C.F.16.L 


Wide multiply matrix complex floating-point little-endian 


W.MUL.MAT.C.F.32.B 


Wide multiply matrix complex floating-point single big-endian 


W.MUL.MAT.C.F.32.L 


Wide multiply matrix complex floating-point single little-endian 


W.MUL.MAT.F.16.B 


Wide multiply matrix floating-point half big-endian 


W.MUL.MAT.F.16.L 


Wide multiply matrix floating-point half little-endian 


W.MUL.MAT.F.32.B 


Wide multiply matrix floating-point single big-endian 


W.MUL.MAT.F.32.L 


Wide multiply matrix floating-point single little-endian 


W.MUL.MAT.F.64.B 


Wide multiply matrix floating-point double big-endian 


W.MUL.MAT.F.64.L 


Wide multiply matrix floating-point double little-endian 



Selection 



class 


op 


type 


prec 


order 


wide multiply matrix 


W.MUL.MAT 


F 


16 32 64 


L B 


C.F 


16 32 


L B 



Format 

W.op. prec. order rd=rc,rb 
rd=wopprecorder(rc,rb) 
31 24 23 



18 17 



12 11 



65 



21 



W.op | pr~l 



W.MINOR.order 



rd 



rc 



rb 



8 



Pr log(prec) - 3 
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1730 



023 m[rc) 



128*128/size) 



















































































































































































































































































0 



127 



rb(128) 



128 rd(128) 0 

Wide multiply matrix floating-point half 
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Definition 

def mul(size,v,i,w,j) as 

mul-^-fmul(F(size,v S i Z e-i + j..i) 1 F(size,w size -i + i j )) 
enddef 



-1780 



def WideMultiplyMatrixFloatingPoint(major,op,gsize,rd rc rb) 
c-*- RegRead(rc, 64) 

b-*-RegRead(rb, 128) 

lgsize-*-log(gsize) 

switch op of 

W.MUL.MAT.F.16, W.MUL.MAT.F.32, W.MUL.MAT.F.64: 

if c igsize-4..o * 0 then 

raise AccessDisallowedByVirtualAddress 
endif 

'f C3..lgsize-3 * 0 then 

wsize -«-(c and (0-c))|| 0 4 
t-*-c and (c-1) 

else 

wsize -*-128 
t-»-c 

endif 

lwsize-«-log(wsize) 

if tlwsize+6-lgsize..lwsize-3 * 0 then 
msize-«-(t and (0-t))|| 0 4 
VirtAddr-*- 1 and (t-1) 

else 

msize -«-128*wsize/gsize 
VirtAddr-«-t 

endif 

vsize-*- msize*gsize/wsize 
W.MUL.MAT.C.F.16, W.MUL.MAT.C.F.32, W.MUL.MAT.C.F.64: 
if c lgsize-4..0 * 0 then 

raise AccessDisallowedByVirtualAddress 

endif 

if c 3..lgsize-3 * 0 then 

wsize-*- (c and (0-c))|| 0 4 
t-*-c and (c-1) 

else 

wsize 128 
t-*-c 

endif 

lwsize-#-log(wsize) 

if t|wsize+5-lgsize..lwsize-3 * 0 then 
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VirtAddr-«-t and (t-1) 



else 

msize-*— 64*wsize/gsize 
VirtAddr-«-t 

endif 

vsize -*-2*msize*gsize/wsize 

endcase 
case major of 
M.MINOR.B: 

order-*- B 
M. MINOR. L: 
order-*- L 

endcase 

m LoadMemoryfc.VirtAddr.msize.order) 
for i-*-0 to wsize-gsize by gsize 
q[0].t-*-NULL 

for j 0 to vsize-gsize by gsize 
case op of 

W.MULMAT.F.16, W.MUL.MAT.F.32, W.MUL.MAT.F.64: 
q(j+gsize]-*-faddq[j], mul(gsize,m,i+wsize* 

i&.lgsize+l ' b -j)) 
W.MUL.MAT.C.F.16, W.MUL.MAT.C.F.32, 
W.MUL.MAT.C.F.64: 

if (~i) & j & gsize = 0 then 

k-*-i-(j&gsize)+wsize*j 8 , jze+1 
qfj+gsize]-«— faqq[j], muKgsize.m.k.b.j)) 

else 

k-«- i+gsize+wsize*j 8 ..igsi Ze+ i 
qfj+gsize] -«-fsubq[j], mul(gsize,m,k,b,i)) 
endif 

endcase 
endfor 

agsize-1+i..i-*- q[vsize] 
endfor 

3l27..wsize"*— 0 

RegWrite(rd, 128, a) 
enddef 
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Exceptions 



Floating-point arithmetic 
Access disallowed by virtual address 
Access disallowed by tag 
Access disallowed by global TB 
Access disallowed by local TB 
Access detail required by tag 
Access detail required by local TB 
Access detail required by global TB 
Local TB miss 
Global TB miss 
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Operation codes 



W.MUL.MAT.G.8.B 


Wide multiply matrix Galois bytes big-endian 


W.MUL.MAT.G.8.L 


Wide multiply matrix Galois bytes little-endian 



Selection 



class 


op 


size 


order 


Multiply matrix Galois 


W.MUL.MAT.G 


8 


B L 



Format 

W. op. order ra=rc,rd,rb 

ra=woporder(rc,rd,rb) 

31 24 23 18 17 12 11 6 5 0 

I W.op.order I rd I rc I rb I r a 

8 6 6 6 6 
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1830 



2047 



m(rc] 



\module/ ^ 



\module/ 



i r 



r \module/ , 



\module/ 



i r 



128*128/size) 



F \module/ 1 \moc 



\module/ 



Xmodule/ 



128 



i r 



ule/, ^module/ ^ ^module/ , \module/ , f 



\module/ 



127 



\module/ 



E 



rd(128) 



ra(128) 

Wide multiply matrix Galois byte 
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Definition ^1860 
def c-«- PolyMultiply(size,a,b) as f 
P(0]^-0 2 * size 
for k-«-0 to size-1 

p[k+1]^-p[k] a a|t? (0 size-k || b || 0 k } . 0 2*si 2 e 
endfor 
c-«— p[size) 
enddef 

def c-*-PolyResidue(size,a,b) as 
P[0] a 

for k-«- size- 1 to 0 by-1 

p[k-1]^-p[k] * p[0] size+k ?(0 size -k|| b || 0 k ) : 0 2 * size 

c^pfsizeJsizg.i.o 
enddef 

def WideMultiplyMatrixGalois(op,gsize,rd,rc,rb ra) 
d-«-RegRead(rd, 128) 
c-*-RegRead(rc, 64) 
b-«-RegRead(rb,128) 
lgsize-«-log(gsize) 
if Clgsize-4..0 * 0 then 

raise AccessDisallowedByVirtualAddress 

endif 

if c 3..lgsize-3' t 0 then 

wsize-*-(c and (0-c))||0 4 
t-*-c and (c-1) 

else 

wsize-*- 128 
t-*-c 

endif 

lwsize-*-log(wsize) 

if tlwsize+6-lgsize..lwsize-3 * 0 then 

msize-«-(t and (0-t)) || 0 4 
VirtAddr-*- 1 and (t-1) 

else 

msize -*-128*wsize/gsize 
VirtAddr-«- 1 

endif 

case op of 

W.MUL.MAT.G.8.B: 

order-*- B 
W.MUL.MAT.G.8.L: 

order L 
endcase f/ q_ 



r 



1860 



m-*-LoadMemory(c, VirtAddr.msize.order) 

for i-*-0 wsize-gsize by gsize 
qlOl-^-O^gsize 

for j-*— 0 to vsize-gsize by gsize 
' + wsize*j 8Jgsi2e 

q[j + gsize]^-q[j] * PolyMultiply(gsize f m k ^ a . 1 .. k .d^s^ j ) 

agsize-i+i..i-*-PolyResidue(gsize,q[vsize],bgsize-i o ) 
endfor ' 

a 127..wsize~*— 0 
RegWrite(ra,128, a) 
enddef 
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Exceptions 



Access disallowed by virtual address 
Access disallowed by tag 
Access disallowed by global TB 
Access disallowed by local TB 
Access detail required by tag 
Access detail required by local TB 
Access detail required by global TB 
Local TB miss 
Global TB miss 
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Operation codes 



E.MUL.ADD.X 


Ensemble multiply add extract 


E.CON.X 


Ensemble convolve extract 


Format 




E.op rd@rc,rb,ra 




rd=gop(rd,rc,rb,ra) 




31 24 


23 18 17 12 11 6 5 0 


I E.op | 


rd I rc I rb | ra I 


8 


6 6 6 6 



FIG. 19 A 
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Figures 19B and 20B has blank fields: should be. 



fsize 



I 



dpos 



x s n 



m 



PTrnd 



gssp 



FIG. 19B 



127 



rc(128) 



\extract/ i ^extrac^ , \extrac^ \8xtract/ 1 , 



k \extract/ i \extract/ i Extract/ ' \extracy 



I 



I 



I 



127 



rb(128) 



I 



128 rd(128) 0 

Ensemble multiply add extract doublets 
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1945 



127 




r 




rc(128) 



I 




\extract/|^tri37 , A pxtrac^ , \extract/ , , 
' \extra"c{7 ^ s+r^r^krr-7± 



Xextract/ \extract/ 



I 



I 




I 



127 



rb(128) 



\extract / 



I 



128 rd(128) o 

Ensemble complex multiply add extract doublets 

This ensemble-multiply-add-extract instructions (E.MULADD.X), when 
the x bit is set, multiply the low-order 64 bits of each of the rc and rb 
registers and produce extended (double-size) results. 
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Ensemble convolve extract doublets 



FIG. 19E 



».*— 



<72 



1975 



255 



rc II rd (256) 




\extract / 



I T 



\extracy Xextract / \extract / 



i r 



128 



rd(128) 0 
Ensemble convolve extract complex doublets 
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Definition 

def muKsize.h.vs.v.i.ws.wJ) as 

muh«- ((vs&v S i Z e.i + i)h-si2e||v size .i + i..O 
enddef 



.1990 



((ws&w size-1 4 h-size || w size -i ^..j) 



def EnsembleExtractlnplacefap.ra.rb.rc.rd) as 
d-*-RegRead(rd, 128) 
c-«-RegRead(rc, 128) 
b-*-RegRead(rb, 128) 
case b8..o of 
0..255: 

sgsize -*-1 28 
256.. 383: 

sgsize -*-64 
384.. 447: 

sgsize -*-32 
448..479: 

sgsize -*-16 
480..495: 

sgsize -*-8 
496.. 503: 

sgsize-«-4 
504.. 507: 

sgsize -+-2 
508.511: 

sgsize -*-1 

endcase 

l-*-an 

m-*-ai2 

n-^-ai3 

signed-*-ai4 

x-«-ai5 

case op of 

E.CON.X: 

if (sgsize < 8) then 

gsize-*-8 
elseif (sgsize*(n-1)*(x+1) > 128 then 
gsize-«-128/(n-1)/(x+1) 

else 

gsize-*- sgsize 

endif 

lgsize-*-log(gsize) 
wsize -«- 128/(x+1) 
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ds-«-cs-«- signed 

bs-«- signed A m 

zs signed or m or n 

zsize -*-gsize*(x+1) 

h-«- (2*gsize) + log(vsize) - Igsize 

spos-«- (aa.,o) and (2*gsize-1) 



E.MULADD.X: 

if(sgsize < 9) then 

gsize-*-8 
elseif (sgsize*(n+1)*(x+1) > 128) then 

gsize-*-128/(n+1)/(x+1) 

else 

gsize-*-sgsize 
endif 

ds-«- signed 
cs-«- signed A m 
zs-*- signed or m or n 
zsize-*- gsize*(x+1) 
h-*- (2*gsize) + n 
spos-*-(a 8 0 ) and (2*gssize-1) 
endcase 

dpos-«-(0|| ^23.as) and (zsize-1) 
r-«-spos 

sfsize -*-(0|| 83-1.24) and (zsize-1) 

tfsize-*- (sfsize = 0) or ((sfsize+dpos) > zsize) ? zsize-dpos : sfsize 
fsize (tfsize + spos > h) ? h - spos : tfsize 
if (bio 9 = Z) and not as then 
rnd-*-F 

else 

rnd^- bio,.9 
endif 
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.1990 



for k 0 to wsize-zsize by zsize 
i-«-k*gsize/zsize 
case op of 
E.CON.X: 
q[0]^-0 

for j-*- 0 to vsize-gsize by gsize 
if n then 

if(~) & j & gsize = 0 then 

q[j+gsize]-*- qfj] + mul(gsize,h,ms,m I i+ 
128-j,bs,b,j) 

else 

qU'+gsize]-*-q[j] - mul(gsize,h,ms,i+ 
128-j+2*gsize ( bs,bJ) 
endif 

else 

qfj+gsize] -^q[j] + mul(gsize,h ,ms,m,i+ 
128-j.bs.bj) 

endif 
endfor 

p-*-q[vsize] 
E.MUL.ADD.X: 

di -*-((ds and dk+zize-1 )h-zsize-r|| (d k +zsize-1..k )|| 0 r ) 
if n then 

if ( i and gsize) = 0 then 

p muKgsize.h.ds.d.i.cs.c.i)- 
mul(gsize l h,ds,d,i+gsize,cs,c,i+gsize)+di 

else 



p^muKgsize.h.ds.dJ.cs.cj-^sizeJ-miuKgsize.h.ds.d.i.cs.c.i-^sizeJ+di 

endif 

else 

p-*- mul(gsize,h,ds,d,i,cs,c,i) + di 

endif 

endcase 
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case rnd of 
N: 

z s-0Ml~p r ||pM 

S^OMI pr 

p. r n-i 

s-*-0 h 

C: 

s ^_ 0 h-r|| ir 

endcase 

v 7-« zs& Ph-i)|| P) * (0|| s) 

EXlWcf^hen & Vr+fsi - 1 > h+1 - r - fsi2e ) « not (I and (op = 

else W "~ (2S & Vr+,size - l)ZSi2e - ,size - dpos llvfsize-i + r.,ll O^Pos 
endjf W ^" (2S ^Vhlhv^ize-dpos-l) . izsize-dpos^iodpos 



Zzsize-1_k..k-*- w 
endfor 

RegWrite(rd, 128, z) 
enddef 



FIG. 19G-4 





Format 

E.op ra=rd,rc,rb 
ra=eop(rd,rc,rb) 
31 



24 23 



18 17 




FIG. 20A 




FIG. 20B 



^-2030 




rc(128) 



128 ra(12ty 0 

Ensemble complex multiply extract doublets 

This ensemble-multiply-extract instructions (E MUL X) wh™ 

the x bit is set, multiply the low-order 64 bits of each oVThp ?, an n k 

reg,sters and produce extended (double-size) resuUs * 



FIG. 20D 




FIG. 20C 



■2040 



127 



127 



rd(128) 



Willi 



rc(128) 



128 



\extrac 



N ^/ 1 - \extrW \extrtct/ 



SextracL 

5 



\extrac 




n r 



95 
80 

0 rb(128) 



79 
64 



yextrac 




ra(128) 0 
Ensemble scale add extract doublets 



FIG. 20E 



2050 



127 rd(128) g 




Ensemble complex scale add extract doublets 

The ensemble-scale-add-extract instructions (E.SCLADD.X), when the x bit 
is set, multiply the low-order 64 bits of each of the rd and re registers by the 
rb register fields and produce extended (double-size) results. 



FIG. 20F 




FIG. 20G 



fsize 



st 



spoc 



rd 



gsize> 



2070 



5" 



rc 



ra 



fsize 



r . dpos 



Ensemble merge extract 



FIG. 20H 



— \- 


gsizex 




^_ \ \ — 


I s 


I a I 


0 ]ra 




_ fsize 
— » < 


dpos 



Ensemble expand extract 



FIG. 201 



Definition 

det muKsize.h.vs.v.i.ws.wJ) as 

muh*- ((vs&v 8iZ e. 1+ j)h.8ize||v si2e . 1 . 
enddef 



-2090 



.i) * ((ws&w S ize-H)h-size|| w , 



size-1 ) 



def EnsembleExtract(op,ra,rb,rc,rd) as 
d-*-RegRead(rd, 128) 
c~«-RegRead(rc, 128) 
b-*-RegRead(rb, 128) 
case b8..o of 
0..255: 

sgsize-*-128 
256..383: 

sgsize-*-64 
384. .447: 

sgsize-*-32 
448. .479: 

sgsize-*-16 
480. .495: 

sgsize-*-8 
496. .503: 

sgsize-*-4 
504.. 507: 

sgsize-«-2 
508. .511: 

sgsize-«-1 

endcase 

m-*-bi2 

signed-*- bi4 

case op of 

E. EXTRACT: 

gsize sgsize*2(2-(m or x)) 
zsize-*- sgsize 
h-*- gsize 
as-*- signed 

spos^-(b8..o) and (gsize-1) 



FIG. 20J-1 



--2090 

E.SCAL.ADD.X: 

if (sgsize < 8) then 

gsize-*- 8 
elseif (sgsize*(n+1) > 32) then 

gsize-*-32/(n+1) 

else 

gsize-«~ sgsize 
endif 

ds-«- cs-«- signed 
bs-*- signed A m 
as-«- signed or m or n 
zsize gsize*(x+1) 
h -«-(2*gsize) + 1 + n 
spos-«-(b8 o) and (2*gsize-1) 
E.MUL.X: 

if (sgsize < 8} then 

gsize-«- 8 
elseif (sgsize*(n+1)*(x+1) > 128) then 

gsize-*-128/(n+1)/(x+1) 

else 

gsize-^- sgsize 

endif 

ds -4- signed 

cs-«- signed A m 

as signed or m or n 

zsize -«-gsize*(x+1) 

h (2*gsize) + n 

spos-«-(b8..o) and (2*gsize-1) 

endcase 

dpos-*- (0|| b23..i6) an( l (zsize-1) 
r-«-spos 

sfsize -*-(0|| b3i.,24) and (zsize-1) 

tfsize (sfsize =0) or ((sfsize+dpos) > zsize) ? zsize-dpos : sfsize 
fsize (tfsize + spos > h) ? h - spos : tfsize 
if (bio 9=Z) and not as then 
rnd-*-F 

else 

rnd-«- b 

endif 



FIG. 20J-2 



for j-^- 0 to 128-zsize by zsize 2090 
i-*- j*gsize/zsize 
case op of 

E. EXTRACT: 
if m or x then 

p-<- dgsize+i-1..i 

else 

P"*~ c)gsize-H-1..i 

endif 
E.MULX: 
if n then 

if (i and gsize) = 0 then 

p mul(gsize,h,ds,d,i,cs,c 
mul(gsize,h,ds,dj+gsize,cs,c,i+gsize) 

else 

muKgsize.h.ds^j.cs.cj+gsizei+muKgsize^^s^J.cs^j+gsize) 

endif 

else 

p mul(gsize,h ,ds t d t i ( cs,c,i) 
endif 
E.SCAL.ADD.X: 
if n then 

if (i and gsize) = 0 then 

p-*-mul(gsize l h l ds ) d,i 1 bs t b,64+2*gsize) 
+ mul(gsize,h,cs,c,i,bs t b,64) 

- mul(gsize,h,ds,d j+gsize,bs,b t 64+3*g$ize) 

- mul(gsize,h,cs,c/i+gsize,bs,b,64-«^size) 

else 

p muHgsize.h.ds.d j l bs,b l 64+3*gsize) 
+ mul(gsize,h,cs,c I i,b$ I b,64+gsize) 
+ mul(gsize # h ) ds t d l i+gsize,bs,b,64+2*gsize) 
+ mul(gsize I h,cs I c t i+gsize t bs ( b f 64) 

endif 

else 

p^- mul(gsize,h,ds ) d t i,bs t b ) 64+gsize) + mul(gsize 
l h,cs l c ( i ( bs I b f 64) 

endif 

endcase 



FIG. 20J-3 



enddef 



case rnd of 

N: ^--2090 
z s^0^||~ Pr || p M 

p. ^^H^ 

C: 

s ^_()h-r|| Y 

endcase 

v^-((as&p M )||p) +{ o|| s) 

else W "" & Vf+fsize=l)2SiZe - fsize - dpos llvfsize-i + r.,IIOdPos 
endif W ~*~ (S ? (Vh " ^ siZ9 " dpos " 1 ) : l 2 *"-^*) |j 0 d POS 
if m and (op = E. EXTRACT) then 

else c dpos-H..j " 

endif 
endfor 

RegWrite(ra, 128, z) 



FIG. 20J-4 




FIG. 21 A 
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Typical dynamic-linked, inter-gateway calling sequence: 
caller: 



caller AA.DDI 


sp@-size 


// allocate caller stack frame 


S.I.64.A 


Ip.sp.off 




S.I.64.A 


dp.sp.off 




... 

L.I.64.A 


!p=dp,off 


// load Ip 


LI.64.A 


dp=dp,off 


// load dp 


B.GATE 






L.I. 64. A 


dD SD Off 




...(code using dp) 






L.I.64.A 


lp=sp,off 


// restore original Ip register 


A.ADDI 


sp=size 


// deallocate caller stack fram< 


B 


IP 


// return 


callee (non-leaf): 






calee: L.I.64.A 


dp=dp,off 


// load dp with data pointer 


S.I.64.A 


sp,dp,off 




L.I.64.A 


sp=dp t off 


// new stack pointer 


S.I.64.A 


lp,sp,off 




S.I.64.A 


dp.sp.off 




...(using dp) 






LI.64.A 


dp,sp,off 




...(code using dp) 






LI.64.A 


lp=sp t off 


// restore original Ip register 


LI.64.A 


sp=sp,off 


// restore original sp register 


B.DOWN 


Ip 




callee (leak, no stack): 






callee: ...(using dp) 






B.DOWN 


ip 





FIG. 21 B 
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Operation end** 

B GATE I Branch gatewa y 



Equivaienripg 




Format 

B.GATE rb 
bgate(rb) 




FIG. 21 C 
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data 



Branch gateway 



FIG. 21 D 



Definition 

def BranchGateway(nj,rc,rb) as 
c <- RegRead(rc, 64) 
b <r- RegRead(rt), 64) 
if (rd*0) or (rc*l)then 

raise Resen/edlnstruction 

endif 

if C2..0 * 0 then 

endif FaiSe AGCessDisailow ^ByVirtualAddress 

d <- ProgramCounter 63 .^i || PrivilegeLevel 

ifPnvilegeLevel<b 1 .. 0 then 

m <- LoadMemoryG(c,c,64 L) 
if b * m then 

raise GatewayDisallowed 

endif 

PrivilegeLevel <- b<i 0 

endif 

ProgramCounter <- b 6 3 2 II 0 2 
RegWrite(rd, 64, d) 
raise TakenBranch 
enddef 



FIG. 21E 



Exceptions 



Reserved Instruction 
Gateway disallowed 
Access disallowed by virtual address 
Access disallowed by tag 
Access disallowed by global TB 
Access disallowed by local TB 
Access detail required by tag 
Access detail required by local TB 
Access detail required by global TB 
Local TB miss 
Global TB miss 



FIG. 21 F 
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Operation codes 



E.SCALADD F 1R 



E^CALADOF.32 



E.SCALADD F ftd 
Selection 




class 



1 scale add 







prec " [ 




fc.SCAL.ADDF 


_16 32 64 H 



Format 

E.op.prec ra=rd,rc,rb 

ra=eopprec(rd t rc,rb) 

31 __ 24 23 

E.op.prec 

8 



18 17 



12 11 



rd 

6 



6 5 



rc 

6 



rb 

6 



ra 

6 




2230 



Definition 

def EnsembleFIoatingPointTemaiytop.prec.rd.rc.rb^) as 
d <- RegRead(rd, 128) 
c <- RegRead(rc, 128) 
b 4- RegRead(rt>, 128) 
for i <- 0 to 128-prec by prec 
di^-F(prec l di +pre<>1J ) 

ci <- F(prec,q +prec -i..i) 

ai <- fadd(fmul(di, F(prec,b pre c-i..o)). fmul(ci t F(prec,b2*p r ec-i..prec))) 
aj+prec-i.J <- PackF(prec, ai, none) 
endfor 

RegWrite(ra, 128, a) 
enddef 



FIG. 22B 
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Operation codes 
I G. BOOLEAN 




Selection 




Format 

G. BOOLEAN rd@trc,trb,f 
rd=gbooleani(rd ( rc,rb,f) 



31 



25 2423 



G. BOOLEAN 



18 17 



ih 



12 11 



rd 



6 5 



rc 

6 



rb 



FIG. 23A 



if f6=f5 then 

if f2=f 1 then 

if f2 then 

rc max(trc,trb) 
rb <- min(trc f trb) 

else 

rc 4- min(trc.trb) 
rb «- max(trc.trb) 

endif 
ih <— 0 

il ^0||f6||f7||f4||f 3 ||f0 

else 

if f2 then 

rc trb 
rb <- trc 

else 

rc <- trc 
rb <- trb 

endif 
ih <e- 0 

i! *- 1 II fe II f7 II f 4 II f3 II fo 

endif 

else 

ih <- 1 
if f6 then 

rc <- trb 

rb trc 

«<-fl II f2 II f? || f 4 || f 3 II f 0 

else 

rc <- trc 
rb <- trb 

Sl *-f2l|fl || f7||f4||f 3 ||fo 

endif 

endif 



FIG. 23B 



Definition 

def GroupBoolean (ih.rd.rc.rb.il) 
d «- RegReadfrd, 128) 
c «- RegRead(rc, 128) 
b <- RegRead(rb, 128) 
if ih=0 then 

if ils=0 then 

f«-i1 3 || iU || il 4 i| il 2 || Hi || (rorb)2|| i! 0 

else 

( f4-i1 3 ||iUI|M4l|il2l|HlllO!|1||ilo 
endif 

else 

f <- ii 3 || 0 ||1 II «2 II «1 II il5 II «4 II «0 

endif 

for i «- 0 to 127 by size 

as <- f(di||q||bi) 
endfor 

RegWrite(rd f 128, a) 
enddef 



FIG. 23C 




Format 

B.HINT badd,count,rd 
bhint(badd,count,rd) 




simm <— badd-pc-4 



FIG. 24A 
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Definition 

def BranchHint(rd,countsimm) as 
d <- RegRead(rd, 64) 
• f Wl ..o)*0 then 

raise AccessDisallowedByVirtualAddress 



endif 

Fi 

enddef 



FetchHint(ProgramCounter +4 + (0 II simm II 0^ ^ „ n2 
ef , v " 5,mm H 0 ). d 63..2 II 0 2 , count) 



FIG. 24B 



Exceptions 

Access disallowed by virtual address 



FIG. 24C 
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Operation codes 



E.SINK.F.16 
E.SINK. F.16C 


FnSPmhlp mnuort f\r\nt 
uiigciiiuic iswlivcil WQcLl 

Ensfimhlp rnnv/Prt flnat 


ing-point 

inn nni 

ing-pomi 


doublets from half nearest default 
doublets from half ceiling 


E.SINK.F.16. CD 
E.SINK.F.16. F 


Ensemble convert float 
Ensemble convert float 


ing-point 
ing-point 


doublets from half ceiiinn default 
doublets from half floor 


E SINK F 16 F D 
E SINK F 16 N 

^ • W • 1 Will 


t.1 lociiiuic conven Tioai 


ing-point 


doublets from half floor default 


E.SINK.F.16.X 


(.Mdciiiuic conven Tioai 
Ensemble convert float 


ing-point 
ing-point 


doublets from half nearest 
doublets from half exact 


E.SINK. F.16.Z 
E.SINK.F.16.Z.D 


Ensemble convert float 
Ensemble convert float 


ing-point 
ing-point 


doublets from half zero 
doublets from half zero default 


E SINK F 32 

E.SINK.F.32.C 


i-iidcuiuie uonven Tioai 
Ensemble convert float 


ing-point 
ing-point 


quadlets from smole nearest default 
quadlets from sinqle ceilinq 


E.SINK.F.32.C.D 


t-iiociuuic uUMVcFl TIOcll 


ing-point 


quadlets from sinqle ceiling default 


E SINK F 32 F 


L.HOC1IIU1C vunvcrx Tioai 


ing-point 


quadlets from sinqle floor 


E SINK F 32 F D 


^Mdciiiuie oonven Tioai 


ing-point 


quadlets from sinqle floor default 


E.SINK.F.32.N 


F-f1<5Pmhlp rnnuort f lr» 


ing-point 


quadlets from sinqle nearest 


E.SINK F 32 X 
E SINK F 32 Z 


L_nodiiuic uunveii Tioai 


ing-point 


quadlets from sinqle exact 


E.SINK. F.32.2.D 


(-iiociML/ic i/Uiivcri Tioai 

L.HOCIMUIC vUMVCIl llUal 


ing-point 
ing-point 


quadlets from sinqle zero 
quadlets from sinqle zero default 


E.SINK.F.64 


Ensemble convert float 


ing-point 


octlets from double nearest default 


E.SINK.F.64.C 
E.SINK. F.64.C.D 


Ensemble convert float 


ing-point 


octlets from double ceiling 


E.SINK.F.64.F 
E.SINK.F.64.F.D 


uiiociiiuic uunvcri Tioai 
Ensemble convert float 


ing-point 
ing-point 
ing-point 


octlets from double ceilinq default 
octlets from double floor 
octlets from double floor default 


E.SINK.F.64.N 
E.SINK.F.64.X 


Ensemble convert float 
Ensemble convert float 


ing-point 
ing-point 


octlets from double nearest 

octlets from double exact 


E.SINK.F.64.Z 


Ensemble convert float 


ing-point 


octlets from double zero 


E.SINK. F.64.Z.D 


Ensemble convert float 


ing-point 


octlets from double zero default 


E.SINK.F.128 


Ensemble convert float 


ing-point 


hexlet from quad nearest default 


E.SINK.F.128.C 


Ensemble convert float 


ing-point 


hexlet from quad ceiling 


E.SINK.F.128.C.D 


Ensemble convert float 


ing-point 


hexlet from quad ceilinq default 


E.SINK.F.128.F 


Ensemble convert float 


ing-point 


hexlet from quad floor 


E.SINK.F.128.F.D 
E.SINK. F.128.N 


Ensemble convert floal 
Ensemble convert float 


ing-point 


hexlet from quad floor default 


E.SINK.F.128.X 


Ensemble convert float 


ing-point 
ing-point 


hexlet from quad nearest 

hexlet from quad exact 


E.SINK.F.128.Z 


Ensemble convert float 


ing-point 


hexlet from quad zero 


E.SINK.F.128.Z.D 


Ensemble convert float 


ing-point 


hexlet from quad zero default 



FIG. 25A-1 




Format 

E.SINK.F.prec.rnd rd=rc 
rd=esinkfprecrnd(rc) 




FIG. 25A-2 
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Definition 

def En$emleSinkFloatingPoint(precjound,rd,rc) 
c-*-RegRead(rc, 128) 
for i-*— 0 to 128-prec by prec 
— F(prec,Ci-K P rec-1 ..i) 
a i+prec-i..i^ — fsinkr(prec, ci, round) 
endfor 

RegWrite(rd, 128, a] 
enddef 



FIG. 256 



Exceptions 

Floating-point arithmetic 



FIG. 25C 



Urd 128 =rr>[rc] (128 + 64/size) * rb J28 
m[ rc ](128*64/size) 



511 




127 



rb(l28) 



128 rd(128) 0 



FIG. 2 



577 m [ re ]( 128*64/size) q 




FIG. 3 



□ specifier=address+(size/2)+( width/2) 



depth — 4 bytes] 



width = 16 bytes 



size — depth x width = 64 bytes 



address is aligned to size (64 bytes), 
so low— order 6 bits are zero 



address 
size/2 



aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa 



000000 | 



000000000000000000000000000000000 | 10000Q | 



width/2 | OOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOQ \ 001 QQQ \ 



specifier 
500 



aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa 



505 



101000 | 



FIG. 5 



510 



specifier I aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa I 101000 \ 
i _ _J 1 

600 S 605 615-^ 



610 



s and (0-s) 
T 



width/2 1 00000000000000000000000000 0000000 \ 001000 

, 



620 



625~^J~ onc j n0 \ (width/2) 



± 



aaaaaaaaaaaaaaaaaaaaaaaaaa aaaaaaaaaaa 



100000 



630 



635- 



t and (0-t) 



± 



size/2 I 000000000000000000000000000000000 1 100000 
— 1 



640 



645- 



± 



t and not (size/2) 



address aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa 
^ 



000000 



650 



FIG. 6 




700 
S 



Register number \ 



705 



Operand 
checker 




7J0A-H- 



725 



r 



Wide operand sp ecifier 



-710 



Memory 
Memory width- 



Register operand 



Register operand 



Portion 0 



Portion 1 



Portion 2 



Portion 3 



Portion 4 



Portion 5 



Portion 6 



t Portion 7 



zzzzzn 



Function 



Function unit with dedicated storage 



Result 



Register width 



715 



Y 



720A 



720n 



J 



714 



735 

/ 
Wide 
operand 

■740 



745 



FIG. 7 



□ wmc. c con ten ts 



□ wmc.pa— physical address 

□ wmc. size-size of contents 

□ wmc. cv— con ten ts valid 

□ wmc. th— thread last used 

□ wmc. reg- register last used 
nwmc.rtv-register & thread valid 



FIG. 9 



Operation codes 



W.SWITCH.B 


Wide switch biq-endian 


W.SWITCH.L 


Wide switch little-endian 



Selection 



class 


op 


order 


Wide switch 


W.SWITCH 


B L 



Format 

W.op. order ra=rc,rd,rb 
ra=woporder(rc,rd,rb) 



31 24 23 18 17 12 11 6 5 0 

I W.op.order I rd I re I rb T Fi 1 

8 6 6 6 6 



FIG. 12A 




Wide Switch 



FIG. 12B 
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Definition 

defWideSwitchfop.rd.rc.rb.ra) 
d-^— RegRead(rd, 128) 
c-«— RegRead(rc, 64) 
b-*-RegRead(rb, 128) 
if Ct.o* 0 then 

raise AccessDisallowedByVirtual Address 
elseif c 6 o *0 then 

VirtAddr-«— c and (c-1) 

W-«— wsize-«— (c and (0-c))|| 0 1 

else 

VirAddr-«-c 
w-*— wsize-«— 128 

endif 

msize-«— 8*wsize 

lwsize-«-log(wsize) 
case op of 

W.SWITCH.B: 

order B 
W.SWITCH.L: 

order L 

endcase 

m LoadMemory(c, VirtAddr.msize.order) 
db-«- d || b 
fori-^Oto 127 

j "*~ 0|| Mwsize-1..0 

1 , 7..1wsizelljlwsize-1..0 

aj-« — dbi 

endfor 

RegWrite(ra, 128, a) 
enddef 



FIG. 12C 



Exceptions 

Access disallowed by virtual address 
Access disallowed by tag 
Access disallowed by global TB 
Access disallowed by local TB 
Access detail required by tag 
Access detail required by local TB 
Access detail required by global TB 
\ Local TB miss 

Global TB miss 



FIG. 12D 
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Operation codes 



W.TRANSLATE.8.B 


Wide translate bvtes biq-endian 


W.TRANSLATE.16.B 


Wide translate doublets bit-endian 


W.TRANSLATE.32.B 


Wide translate quadlets bit-endian 


W.TRANSLATE.64.B 


Wide translate octlets biq-endian 


W.TRANSLATE.8.L 


Wide translate bvtes little-endian 


W.TRANSLATE.16.L 


Wide translate doublets little-endian 


W.TRANSLATE.32.L 


Wide translate quadlets little-endian 


W.TRANSLATE.64.L 


Wide translate octlets little-endian 



Selection 



class 


size 


order 


Wide translate 


8 16 32 64 


B L 



Format 

W.TRANSLATE. size. order rd=rc,rb 
rd=wtranslatesizeorder(rc,rb) 



31 



2434 



1817 



1211 



65 



21 0 



W.TRANSLATE.order 


rd 


rc 


rb 


0 


sz 


6 


6 


6 


6 


4 


2 



sz-«— log(size) = 3 



FIG. 13 A 



1 



> : > E '-'ox 




r 



1330 



vsize 



g size 

w size 



Wide translate: 16 entries by 64 bits 



FIG. 13B 



Definition 



1350 



def Wide Transiate(op,gsize,rd,rc,rb) 
c-+-RegRead(rc, 64) 
b-*-RegRead(rb, 128) 
lgsize-«-log(gsize) 
'f c lgsize-4..0 * 0 then 

raise AccessDisallowedByVirtual Address 

endif 

if c 4..lgsize-3 * 0 then 

wsize-«-(c and (0-c))||0 3 
t-*— c and (c-1) 

else 

wsize-*— 128 
t-«-c 

endif 

Iwsize-*— log(wsize) 

if tlwsize+4..lwsize-2 * 0 then 

msize-«-(t and (0-t))||0 4 
VirtAddr-*- 1 and (t-1) 

else 

msize-«-256*wsize 
VirtAddr-«-t 

endif 

case op of 

W. TRANSLATE. B: 

order-*— B 
W. TRANSLATE. L: 

order-*— L 

endcase 

m-a-LoadMemoryfc.VirtAddr.msize.order) 
vsize-*— msize/wsize 

lvsize-*-log(vsize) 

for i-«-0 to 128-gsize by gsize 

j^((order=B)lvsize ) A (b| vsi2e . 1+u ))*wsize-H 

a gsize-1+i..i-«— mj+gsize-1 i 
endfor 

RegWrite(rd, 128, a) 
enddef 



FIG. 13C 



Exceptions 



Access disallowed by virtual address 
Access disallowed by tag 
Access disallowed by global TB 
Access disallowed by local TB 
Access detail required by tag 
Access detail required by local TB 
Access detail required by global TB 
Local TB miss 
Global TB miss 



FIG. 13D 



Operation codes 



1410 



W.MUL.MAT.8.B 


Wide multiply matrix siqned bvte biq-endian 


W.MUL.MAT.8.L 


Wide multiply matrix signed bvte little-endian 


W.MUL.MAT.16.B 


Wide multiply matrix signed doublet biq-endian 


W.MUL.MAT.16.L 


Wide multiply matrix siqned doublet little-endian 


W.MUL.MAT.32.B 


Wide multiply matrix signed auadlet biq-endian 


W.MUL.MAT.32.L 


Wide multiply matrix signed quadlet little-endian 


W.MUL.MAT.C.8.B 


Wide multiply matrix signed complex bvte biq-endian 


W.MUL.MAT.C.8.L 


Wide multiply matrix signed complex bvte little-endian 


W.MUL.MAT.C.16.B 


Wide multiply matrix siqned complex doublet biq-endian 


W.MUL.MAT.C.16.L 


Wide multiply matrix siqned complex doublet little-endian 


W.MUL.MAT.M.8.B 


Wide multiply matrix mixed-signed byte big-endian 


W.MUL.MAT.M.8.L 


Wide multiply matrix mixed-sioned bvte little-endian 


W.MUL.MAT.M.16.B 


Wide multiply matrix mixed-signed doublet big-endian 


W.MUL.MAT.M.16.L 


Wide multiply matrix mixed-signed doublet little-endian 


W.MUL.MAT.M.32.B 


Wide multiply matrix mixed-sioned quadlet bio-endian 


W.MUL.MAT.M.32.L 


Wide multiply matrix mixed-siqned quadlet little-endian 


W.MUL.MAT.P.8.B 


Wide multiply matrix polynomial byte biq-endian 


W.MULMAT.P.8.L 


Wide multiply matrix polynomial byte little-endian 


W.MUL.MAT.P.16.B 


Wide multtply matrix polynomial doublet biq-endian 


W Ml II MAT P 1R I 
V V . IVIw L . Ivl M 1 .r . ID.L 


Wide multiply matrix polynomial doublet little-endian 


W.MUL.MAT.P.32.B 


Wide multiply matrix polynomial quadlet biq-endian 


W.MUL.MAT.P.32.L 


Wide multiply matrix polynomial quadlet little-endian 


W.MUL.MAT.U.8.B 


Wide multiply matrix unsiqned byte biq-endian 


W.MUL.MAT.U.8.L 


Wide multiply matrix unsiqned bvte little-endian 


W.MUL.MAT.U.16.B 


Wide multiply matrix unsiqned doublet biq-endian 


W.MULMAT.U.16.L 


Wide multiply matrix unsiqned doublet little-endian 


W.MUL.MAT.U.32.B 


Wide multiply matrix unsiqned quadlet biq-endian 


W.MUL.MAT.U.32.L 


Wide multiply matrix unsigned quadlet little-endian 



Selection 



class 


op 


type 


size 


order 


multiply 


W.MUL.MAT 


NONE MUP 


8 16 32 


B 










L 






C 


8 16 


B 










L 



Format 

W.op.size.order rd=rc,rb 
rd=wopsizeorder(rc,rb) 
31 2423 



1817 



1211 



| W.MINOR.order 



65 



rd 



21 0 



rc 



8 

sz-*- log(size) - 3 



rb 



W.op 



sz 
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i i 



m[rc](128*64/size) 
511 



128 



































































































































1 


f 1 


f 1 


f 1 

/ 


t 0 



127 



rb(128) 



rd(128) 0 
Wide multiply matrix 
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Definition 



r 



1480 



def muKsize.h.vs.v.i.ws.jJas 



m ul ((vs&vsize- 1 + i ) h - size || v, 
enddef 



'size-1+i..i) *((ws&w S ize-1 + j) h - size || w. 



def c-*-PolyMultiply(size,a,b) as 

p[0J^-0 2 * size 

for k-«-0 to size-1 

p[k+1]-»-p[k] a a|(? (0 size-k|| b ii 0 k } . 0 2*size 

endfor 

c-«-p[size] 
enddef 

def WideMultiplyMatrixfmajor.op.gsize.rd.rc rb) 
d-«-RegRead(rd, 128) 
c-*-RegRead(rc, 64) 
b-*-RegRead(rb,128) 
Igsize -*-log(gsize) 
'f c lgsize-4..0 * 0 then 

raise AccessDisallowedByVirtualAddress 

end if 

if c 2..lgsize-3 * 0 then 

wsize-«-(c and (0-c))|| 0 4 
t-*-c and (c-1) 

else 

wsize-*— 64 
H*-a 

end if 

Iwsize-^-log(wsize) 

" tlwsize+6-lgsize..lwsize-3 * 0 then 

msize-^-(t and (0-t)) || 0 4 
VirtAddr-«-tand (t-1) 

else 

msize -*-1 28*wsize/gsize 
VirtAddr-*-t 

endif 

case major of 

W.MINOR.B: 

order -«—B 
W.MINOR.L: 

order-+-L 

endcase 
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C3SS op of 

M.MUL.MAT.U.8, W.MULMAT.U.16, W.MUL.MAT.U.32, 
W.MUL.MAT.U.64: 

ms-<— bs-*-0 
W.MUL.MAT.M.8, W.MUL.MAT.M.16, W.MUL.MAT.M.32, 
W.MUL.MAT.M.64 

ms-*-0 

bs-*-1 

W.MUL.MAT.8, W.MUL.MAT.16, W.MUL.MAT.32, 
W.MUL.MAT.64, W.MUL.MAT.C.8, W.MUL.MAT.C.16, 
W.MULMAT.C.32, W.MUL.MAT.C.64: 

ms-*— bs 1 
W.MULMAT.P.8, W.MULMAT.P.16, W.MUL.MAT.P.32, 
W.MUL.MAT.P.64: 
endcase 

m LoadMemory(c,VirtAddr,msize,order) 
h 2*gsize 

for i-*—0 to wsize-gsize by gsize 
q[0]-«-0 2 *9 size 

for j-*-0 to vsize-gsize by gsize 
case op of 

W.MULMAT.P.8, W.MUL.MAT.P.16, 
W.MUL.MAT.P.32, W.MUL.MAT.P.64: 
k i+wsize*j 8Jgsiz9 

q[j+gsize] q[j] A PolyMultiply(gsize,m k+ g S j Ze .i..k, 
bj+gsize-1..j) 

W.MUL.MAT.C.8, W.MUL.MAT.C.16, W.MUL.MAT.C.32, 
W.MUL.MAT.C.64: 

if (~i) & gsize = 0 then 

k-*-i-G&gsize)+wsize*j 8 ..i gS j Z e+i 
q[j+gsize}-*- q[i] + mul(gsize,h,ms,m,k,bs,b,j) 

else 

k i+gsize+wsize*j 8 ..|g S i ze+ i 
q[i+gsize)-*-q[i] = mul(gsize,h,ms,m,k,bs,b,j) 

endif 
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W.MUL.MAT.8, W.MUL.MAT.16, W.MUL.MAT.32, 
W.MUL.MAT.64, W.MUL.MAT.M.8, W.MUL.MAT.M.16, 
W.MUL.MAT.M.32, W.MUL.MAT.M.64, W.MUL.MAT.U.8, 
W.MUL.MAT.U.16, W.MUL.MAT.U.32, W.MUL.MAT.U.64 
q[i+gsize] -*-q[i] + mul(gsize,h,ms,m,i+wsize* 

j8..lgsize,bs,b,j) 

endfor 

a 2*gsize-1+2*i..2*i -*-q[vsizeJ 
endfor 

3l27..2*wsize-*- 0 
RegWrite(rd, 128, a) 
enddef 
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Exceptions 



Access disallowed by virtual address 
Access disallowed by tag 
Access disallowed by global TB 
Access disallowed by local TB 
Access detail required by tag 
Access detail required by local TB 
Access detail required by global TB 
Local TB miss 
Global TB miss 
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Operation codes 



W.MUL.MAT.X.B 


Wide multiply matrix extract biq-endian 


W.MULMAT.X.L 


Wide multiply matrix extract little-indian 



Selection 



class 


op 


order 


Multiply matrix extract 


W.MUL.MAT.X 


B L 


Format 








W.op.order ra=rc,rd,rb 








ra=wop(rc,rd,rb) 

31 2423 


1817 


1211 65 


0 


| W.op.order | rd 


I 


rc I rb | 


ra | 


8 6 




6 6 


6 
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-r-. ?423 16151413121110 9 8 

fsize I QPQs |x|s|n|m|||rnd| ' 



gssp 



8 8 111112 
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023 m[rc](128»128/size) 



Xextract/ 



i r 



\extract/ 1 ^extrac^ Aextrac^ , \extract/ , r 



\extract/ 



FT 



\extrac{/ 



i r 



127 



rd(128) 



Xextract/ rb < 32 ) 



128 ra(128) 0 

Wide multiply matrix extract doublets 
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Definition 

def eb^_ ebits(prec) as 
case pref of 
16: 

eb-*- 5 

32: 

eb -*-8 

64: 

eb-«-1l 

128: 

eb-*-15 

endcase 
enddef 

def eb-*-ebias(prec)as 

eb -*-0|| iebits(prec)-1 
enddef 

def fb-»- fbits(prec) as 
fb ~+- prec - 1 - eb 
enddef 

def a F(prec, ai) as 

a.s aiprec-1 

ae aiprec-2..fbits(prec) 

af-^aifbit S (prec)-l..o 
if ae = lebits(prec) then 

if af = 0 then 

a.t INFINITY 
elseif affbitsjprec)^ then 
a.t -*-SNaN 
a.e -*--f bits (prec) 

else a f "*" 1 " affbi,s (P re cH-0 

a.t -*-QNaN 

a.e -fbits(prec) 
a.f -*-af 

endif 
elseif ae = 0 then 
if af = 0 then 

a.t-*- ZERO 
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else 

a.t-*- NORM 

a -®-*- 1 -ebias(pec)-fbits(prec) 
a.f ♦Ollaf 

endif 

else 

a.t -*-NORM 

a 'f "*~ a ,?" ebias (P re c)-fbits(prec) 
a.f 1| af 

endif 
enddef 



def a DEFAULTQNAN as 
a.s -♦-() 

a t -«-QNAN 

a.e -1 

a.f 1 
endder 

def a DEFAULTSNAN as 
a.s-«- 0 

at -*-SNAN 
a.e -#--1 
a.f —1 
enddef 
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def fadd(a.b) as faddr(a,b,N) endder 

def c -*-faddr(a,b,round) as 

if a.t=NORM and b.t=NORM then 

// d,e are a,b with exponent aligned and fraction adjusted 
if a.e > b.e then 1 

d ^-a 

e.t-*-b.t 

e.s-*- b.s 

e.e a.e 

e.f -«-b.f || 0 a e ' b - 8 
else if a.e < b.e then 

d.t-^a.t 

d.s-*-a.s 

d.e -«-b.e 

d.f -*-a.f || o b e - a e 

e -*-b 

endif 

c.t -*-d.t 

c.e-*-d.e 

if d.s = e.s then 

c.s -*-d.s 

c.f -*-d.f + e.f 
elseif d.f > e.f then 

c.s -#-d.s 

c.f -*-d.f - e.f 
elseif d.f < e.f then 

c.s -«-e.s 

c.f-^e.f - d.f 

else 

c.s-«-r=F 
c.t -*-ZERO 
endif 
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// priority is given to be operand for NaN propagation ' 
elseif (b.t=SNAN) or (b.t=QNAN) then 
c -»-b 

elseif (a.t=SNAN) or (a.t=QNAN) then 
c a 

elseif a.t=ZERO and b.t=ZERO then 
c.t-*-ZERO 

c.s (a.s and b.s) or (round=F and (a.s or b.s)) 
// NULL values are like zero, but do not combine with ZERO to alter sign 
elseif a.t=ZERO or a.t=NULL then 

c-*-b 

elseif b.t=ZERO or b.t=NULL then 
c a 

elseif a.t=INFINITY and b.t=INFINITY then 
if a.s * b.s then 

c DEFAULTSNAN // Invalid 

else 

c -«-a 

endif 

elseif a.t=INFINITY then 

C ^ 3 

elseif b.t=INFINITY then 
c-«-b 

else 

assert FALSE // should have covered all the cases above 

endif 
enddef 

def b fneg(a) as 

b.s-*- -a.s 

b.t-*-a.t 

b.e -«-a.e 

b.f -*-a.f 
enddef 

def fsub(a.b) as fsubr(a,b,N) enddef 

def fsubr(a,b,round) as faddr(a,fneg(b),round) enddef 

def frsub(a.b) as frsubr(a,b,N) enddef 

def frsubr(a t b,round) as faddr(fneg(a),b, round) enddef 
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def c-*- fcom(a.b) as 

if (a.t-SNAN) or (a.t=QNAN) or (b.t=SNAN) or (b.t=QNAN) then 

elseif a.t=INFINITY and b.t=IN FINITY then 
if a.s * b.s then 

c + (a.s=0) ? G: L 

else 

c E 
endif 

elseif a.t=INFINITY then 

c-*- (a.s=0) ? G: L 
elseif b.t=INFINITY then 

c-«-(b.s=0) ? L 
elseif a.t=NORM and b.t=NORM then 

if a.s * b.s then 

c -»-(a.s=0) ? G: L 

else 

if a.e > b.e then 
af-*-a.f 

bf-*-b.f||O ae - be 

else 

af-«- a.f||O b e - a e 
bf-^-b.f 

endif 

if af = bf then 
c-»-E 

else 

c-^((a.s=0) A (af >bft)?G • L 
endif v 

endif 

elseif a.t=NORM then 

c-*-(a.s=0) ? G: L 
elseif b.t=NORM then 

c-*- (b.s=0) ? G: L 
elseif a.t=ZERO and b.t=ZERO then 

c-*-E 

else 

endif 3856 '* FALSE " Sh0U ' d have covered 31 the case * above 
enddef 
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def c -*-fmul(a,b) as " 
if a.t=NORM and b.t=NORM then 

c.s a.s A b.s 

c.t NORM 

c.e a.e +• b.e 

c.f -«-a.f *b.f 
// priority is given to b operand for NaN propagation 
elseif (b.t=SNAN) or (b.t=QNAN) then 

c.s a.s A b.s 

c.t b.t 

c.e-»- b.e 

c.f b.f 
elseif (a.t=SNAN) or (a.t=QNAN) then 

c.s a.s A b.s 

c.t -*-a.t 

c.e -*-a.e 

c.f -*-a.f 

elseif a.t=ZERO and b.t=INFINITY then 

c DEFAULTSNAN // Invalid 
elseif a.t=INFINITY and b.t=ZERO then 

c DEFAULTSNAN // Invalid 
elseif a.t=ZERO or b.t=ZERO then 

c.s -*-a.s A b.s 

c.t ZERO 

else 

assert FALSE // should have covered al the cases above 
endif 
enddef 
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def c fdivr(a t b) as 7 
if a.t=NORM and b.t=NORM then 

c.s-*- a.s A b.s 

c.t -*-NORM 

c.e-«-a.e- b.e +256 

c.f (a.f 0 ) / b.f 
// priority is given to b operand for NaN propagation 
elseif (b.t=SNAN) or (b.t=QNAN) then 

c.s-«- a.s A b.s 

c.t b.t 

c.e -*-b.e 

c.f + b.f 
elseif (a.t=SNAN) or (a.t=QNAN) then 

c.s a.s A b.s 

c.t a.t 

c.e-«- a.e 

c.f -t-a.f 

elseif a.t=ZERO and b.t=INFINITY then 

c DEFAULTSNAN // Invalid 
elseif a.t=INFINITY and b.t=INFINITY then 

c ^-DEFAULTSNAN // Invalid 
elseif a.t=ZERO then 

c.s-*- a.s A b.s 

c.t-*- ZERO 
elseif a.t=INFINITY then 

c.s -*-a.s A b.s 

c.t-*- INFINITY 

else 

assert FALSE // should have covered al the cases above 
endif 
enddef 

def msb-*- findmsb(a) as 

MAXF-*- 2 18 // Largest possible f value after matrix multiply 
for j-«-0 to MAXF 

ifa MA XF.i..j MO^^HlDthen 
msb-*- j 

endif 
endfor 
enddef 
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Def ai-*- PackF(prec,a,round) as 
case a.t of 
NORM: 

msb findmsb(a.f) 

m -*-msb-1-fbits(prec) //1sb for normal 

*~ ( :l1Tl™ a r e d r bi ' S(preC » " 
if rb < 0 then 

aifr-^ a.fmsb-i..o||O rb 
eadj -*- 0 

else 

case round of 

C: 

s^0 msb - rb ||(^ a .s)rb 

s ^-o msb - r b|| (as) rb 
N, NONE: 

s ^0msb-rb|| ~ a .f rb ||a.f r rb T 1 

A. 

if a.frb-L.o * 0 then 

raise FloatingPointArithmetic // Inexact 
endif 

s-*-0 

Z: 

endcase 

v^(0||a.f msbi . 0 ) + (0|| S ) 
if v msb =l then 

aifr Vmsb.! rb 

eadj 0 

else 

aifr-*- 0 fbits (prec) 
eadj 1 
endif 
endif 

aien — a.e + msb - 1 + eadj + ebias(prec) 
if aien < 0 then w ' 

if round = NONE then 

ai-*-a.s||0 ebi,s (P re c)|| ajfr 

else 

raise FloatingPointArithmetic //Underflow 
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endif 

elseif aien > lebits(prec) tnen 
if round = NONE then 

//default: round-to-nearest overflow handling 
ai a.s| | lebits(prec) 1 1 Qfbits(prec) 

else 

raise FloatingPointArithmetic // Overflow 
endif 

else 

ai-*- a.s|| aien©bii S (prec)-i..o 1 1 aifr 
endif 

SNAN: 

if round * NONE then 

raise FloatingPointArithmetic //Invalid 
endif 

if -a.e < fbits(prec) then 

ai -»-a.s|| iebits(prec>| | a .f- a .e-1..oll 0 fc "*<P wo )* a - s 

else 

Isb a.f. a .e-1-fbits(prec)+1..O*0 

ai a.s 1 1 lebits(prec)| |a.f- a .e-1..-a.8-1-fbits(prec)+2 ||1sb 
endif 
QNAN: 

if -a.e < fbits(prec) then 

ai-*- a.s|l iet>its(prec>|| a .Ue-1..0||0 fbU8 <P wc >* M 

else 

1sb-*- a.f- a .8-1-fbits(precH..O* 0 
ai -*-a.s||1 ebits <P re <»|| a.f-a.e-1..-a.e-1-fbit 6 (prec)+2||1sb 

endif 
ZERO: 

ai-*- a.s 1 1 o ebi,s (P re °)|| o fbits (P rec ) 
INFINITY: 

ai a.s 1 1 i 9bits <P re c) 1 1 o^'ts^P^ 0 ) 

endcase 
defdef 
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ai-«- fsinkr(prec, a, round) as 
case a.t of 
NORM: 

msb-«- findmsb(a.f) 

rb-«--a.e 

if rb < 0 then 

aifr-»- a.f ms b..o||0- rb 
aims-*- msb - rb 

else 

case round of 
C.C.D: 

s -«-0 msb - rb ||(~ai.s) rb 

F.F.D: 

s ^_ 0 msb-rb|, (ajs)rb 
N, NONE: 

s^-O msb - rb |hai.f rb ||ai.f[ b -l 

A. 

if ai.frb-i..o 0 then 

raise FloatingPointArithmetic // Inexact 
endif 

s -*-0 

Z, Z.D: 

s -«-0 

endcase 

v^(0||a.f msb .. 0 ) + (0|| S ) 
>f v msb =l then 

aims msb + 1 - rb 

else 

aims-*- msb - rb 
endif 

aifr-*- v a im S .. rb 
endif 

if aims > prec then 
case round of 

CD, F.D, NONE, Z.D: 

ai-*- a.s||(~as)prec-i 
C,F,N,X,Z: 

raise FloatingPointArithmetic // Overflow 

endcase 
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elseif a.s = 0 then f 
ai -«-aifr 

else 

ai -»--aifr 

endif 
ZERO: 

a j-#_ 0P r ec 

SNAN, QNAN: 
case round of 

CD, F.D, NONE, Z.D: 

ai-^- 0P rec 
C, F, N, X, Z: 

raise Floatingpoint Arithmetic // Invalid 

endcase 
INFINITY: 

case round of 

CD, F.D, NONE, Z.D: 

ai-*-a.s||(~as)P rec - 1 
C, F, N, X, Z: 

raise FloatingPointArithmetic // Invalid 

endcase 

endcase 
enddef 
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def c frecrest(a) as 
b.s-*-0 
b.t -*-NORM 
b.e-«-0 
b.f 

c -*-fest(fdiv(b,a)) 
enddef 

def c frsqrest(a) as 
b.s-^0 
b.t-*- NORM 
b.e-^0 
b.f -^1 

c fest(fsqr(fdiv(b,a))) 
enddef 
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def c -*-fest(a) as 
if (a.t=NORM) then 

msb -*-findmsb(a.f) 
a.e-#-a.e + msb- 13 
a.f a.f ms b..msb-12|| 1 

else 

c a 
endif 
enddef 

def fsqr(a) as 

if (a.t=NORM) and (a.s=0) then 
c.s -*-0 
c.t-«-NORM 
if (a.e 0 =1) then 

c.e ^-(a.e-127) / 2 
c.f -<-sqr(a.f||0 127 ) 

else 

c.e^(a.e-128)/2 
cf^sqr(a.f||0 1 28) 
endif 



6 ' Sei ' (a:^„° r (a '- QNAN) ° r a ,=ZER0 or «a '=INF.N.TY) and 
c-^ a 

elseir ((a.t=NORM) or (a.t=INFINITY)) and (a s=1) then 
c -*-DEFAULTSNAN // Invalid ' 

else 

endif 3856 '* FALSE " Sh ° U,d haVe covered a1 the "ses above 
enddef 
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Operation codes 



G.ADD.8 


Group add bvtes 

r j 


G. ADD. 16 


Ciroup add doublets 


G.ADD.32 


Group add quadlets 


G.ADD.64 


Group add octlets 


G. ADD. 128 


Group add hexlet 


G.ADD.L.8 


Group add limit si&ned bvtes 


G.ADD.L.16 


Group add limit si&ned doublets 


G.ADD.L.32 


Group add limit si&ned nuadlet<? 


G.ADD.L.64 


Group add limit sicned octlets 


G.ADD.L.128 


Group add limit signed hexlet 


G.ADD.L.U.8 


Group add limit unsigned hvtec 


G.ADD.L.U.16 


Group add limit unsigned d nn HI etc 


G.ADD.L.U.32 


Group add limit unsioneH nn^rtl^tc 


G.ADD.L.U.64 


Ciroup add limit unsigned nrtl**t« 


G.ADD.L.U.128 


Group add limit unsigned hexlet 


G.ADD.8.0 


Group add signed bytes check overflow 


G.ADD.16.0 


Group add signed doublets check overflow 


G.ADD.32.0 


Group add signed quadlets check overflow 


G.ADD.64.0 


Group add signed octlets check overflow 


G.ADD.128.0 


Group add signed hexlet check overflow 


G.ADD.U.8.0 


Group add unsigned bytes check overflow 


G.ADD.U.16.0 


Group add unsigned doublets check overflow 


G.ADD.U.32.0 


Group add unsigned quadlets check overflow 


G.ADD.U.64.0 


Group add unsigned octlets check overflow 


G.ADD.U.128.0 


Group add unsigned hexlet check overflow 



FIG. 26A 



Format 

G. op. size 



rd=rc,rb 



rd=gopsize(rc,rb) 
31 



24 23 



18 17 



12 11 



6 5 



G.size 

8 



rd 



rc 



rb 



_op_ 



FIG. 26B 



Definition 



def Group(op,size,rd,rc,rb) 
c <- RegRead(rc, 128) 
b <- RegRead(rb, 128) 
case op of 
G.ADD: 

for i <— 0 to 128-size by size 

ai-fsize-L.i <- ci+ s ize-l..i + bi-h S ize-L.i 
endfor 
G.ADD.L: 

for i <- 0 to 128-size by size 

t <~ (ci+size-1 II ci+size-l..i) + (bi+size-1 II bi+size-L.i) 

ai+size-L.i <- (tsize * t s ize-l) ? (tsize II t§j$g:l) : tsize-L.O 
endfor 
G.ADD.L .U: 

for i <— 0 to 128-size by size 

t <- (Ol || ci-fsize-L.i) + (Ol || bi+size-1 ..i) 
ai+size-L.i <~ (tsize * 0) ? (isize) : t s ize-L.O 
endfor 
G.ADD.O: 

for i <™ 0 to 128-size by size 

t <- (ci-hsize-1 II ciH-size-l..i) + (bi+size-1 II bi+size-L.i) 
if tsize tsize-l then 

raise FixedPointArithmetic 
endif 

ai+size-L.i <— tsize-L.O 
endfor 
G.ADD.U.O: 

for i <- 0 to 128-size by size 

t «- (0 1 || ci+size-l..i) + (Ol || bi+size-L.i) 

if tsize * 0 then 

raise FixedPointArithmetic 
endif 

ai-fsize-L.i <- tsize-L.O 
endfor 

endcase 

RegWrite(rd, 128, a) 
enddef 
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Operation codes 



G.SET.AND.E.8 


Group set and equal zero bytes 


G.SET.AND.E.16 


Group set and equal zero doublets 


G.SET.AND.E.32 


Group set and equal zero quadlets 


G.SET.AND.E.64 


Group set and equal zero octlets 


G.SET.AND.E.128 


Group set and equal zero hexlet 


G.SET.AND.NE.8 


Group set and not equal zero bytes 


G.SET.AND.NE. 16 


Group set and not equal zero doublets 


G.SET.AND.NE.32 


Group set and not equal zero quadlets 


G.SET.AND.NE. 64 


Group set and not equal zero octlets 


G.SET.AND.NE. 128 


Group set and not equal zero hexlet 


G.SET.E.8 


Group set equal bytes 


G.SET.E.16 


Group set equal doublets 


G.SET.E.32 


Group set equal quadlets 


G.SET.E.64 


Group set equal octlets 


G.SET.E.128 


Group set equal hexlet 


G.SET.GE.8 


Group set greater equal signed bytes 


G.SET.GE.16 


Group set greater equal signed doublets 


G.SET.GE.32 


Group set greater equal signed quadlets 


G.SET.GE.64 


Group set greater equal signed octlets 


G.SET.GE.128 


Group set greater equal signed hexlet 


G.SET.GE.U.8 


Group set greater equal unsigned bytes 


G.SET.GE.U.16 


Group set greater equal unsigned doublets 


G.SET.GE.U.32 


Group set greater equal unsigned quadlets 


G.SET.GE.U.64 


Group set greater equal unsigned octlets 


G.SET.GE.U.128 


Group set greater equal unsigned hexlet 


G.SET.L.8 


Group set signed less bytes 


G.SET.L.16 


Group set signed less doublets 


G.SET.L.32 


Group set signed less quadlets 


G.SET.L.64 


Group set signed less octlets 


G.SET.L.128 


Group set signed less hexlet 


G.SET.L.U.8 


Group set less unsigned bytes 


G.SET.L.U.16 


Group set less unsigned doublets 


G.SET.L.U.32 


Group set less unsigned quadlets 


G.SET.L.U.64 


Group set less unsigned octlets 


G.SET.L.U.128 


Group set less unsigned hexlet 


G.SET.NE.8 


Group set not equal bytes 


G.SET.NE.16 


Group set not equal doublets 


G.SET.NE.32 


Group set not equal quadlets 


G.SET.NE.64 


Group set not equal octlets 


G.SET.NE.128 


Group set not equal hexlet 


G.SUB.8 


Group subtract bytes 


G.SUB.8.0 


Group subtract signed bytes check overflow 
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<jr.bUo.lo 


Uroup subtract doublets 


n CT TD U A 

Lr.bUB. lo.U 


Group subtract signed doublets check overflow 


n CT TD 


Group subtract quadlets 


Vj.bUB.i2.U 


Group subtract signed quadlets check overflow 


G..bUB.o4 


Group subtract octlets 


(j\z>UB.64.0 


Group subtract signed octlets check overflow 


G. SUB. 128 


Group subtract hexlet 


Cj.SUB.128.0 


Group subtract signed hexlet check overflow 


Cj.SUB.L.8 


Group subtract limit signed bytes 


G.SUB.L. 16 


Group subtract limit signed doublets 


vj.bUB.L.32 


Group subtract limit signed quadlets 


G.SUB.L.64 


GrouD Suhtrarf limit cionpH nrtl*»tc 


G.SUB.L. 128 


Group subtract limit signed hexlet 


G.SUB.L.U.8 


Group subtract limit unsigned bytes 


G.SUB.L.U.16 


Group subtract limit unsigned doublets 


G.SUB.L.U.32 


Group subtract limit unsigned quadlets 


G.SUB.L.U.64 


Group subtract limit unsigned octlets 


G.SUB.L.U.128 


Group subtract limit unsigned hexlet 


G.SUB.U.8.0 • 


Group subtract unsigned bytes check overflow 


G.SUB.U.16.0 


Group subtract unsigned doublets check overflow 


G.SUB.U.32.0 


Group subtract unsigned quadlets check overflow 


G.SUB.U.64.0 


Group subtract unsigned octlets check overflow 


G.SUB.U.128.0 


Group subtract unsigned hexlet check overflow 
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Format 



G.op.size rd=rb,rc 



rd=gopsize(rb,rc) 
31 



G.size 

8 



24 23 



18 17 



12 11 



6 5 



rd 



rc 



rb 



op 
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Definition 

def GroupReversed(op,size,rd,rc,rb) 
c <- RegRead(rc, 128) 
b <- RegRead(rb, 128) 
case op of 
G.SUB. 

for i <— 0 to 128-size by size 

ai+size-1.. i <- bi+size-L.i * c i+size-l..i 
endfor 
G.SUB.L: 

for i <~ 0 to 128-size by size 

t <- (bi+size-1 II bi+ s ize-l..i) - (ci+size-1 II ci+size-1. .i) 

ai+size-1. .i «- (t s ize * tsize- l) ? (tsize II tgBS-1) : t s ize-1..0 
endfor 
G.SUB.LU: 

for i <- 0 to 128-size by size 

t <- (0 1 || bi+size-i..!) - (0 1 || ci+size-L.i) 

ai+size-1. A <- (t s izc * 0) ? 0 size : t s ize-1..0 
endfor 
G.SUB.O: 

for i <r~ 0 to 128-size by size 

t <- (bi+size-1 II bi+size-l..i) - (ci+size-1 II ci+ s i 2e -l..i) 
if (tsize * tsize- 1) then 

raise FixedPointArithmetic 

endif 

ai+size-1.. i <-~ tsize- L.O 
endfor 
G.SUB.U.O: 

for i <— 0 to 128-size by size 

t <- (0 1 || bi+size-l.,i) - (0 1 || ci+ s i 2 e-l..i) 
if (tsize * 0) then 

raise FixedPointArithmetic 

endif 

ai+size-1.. i <— tsize- 1..0 
endfor 
G.SET.E: 

for i <- 0 to 128-size by size 

ai+size-1. .i «- (bi+size-l..i = ci+size-1.. i) slze 
endfor 
G.SET.NE: 

for i <- 0 to 128-size by size 

ai+size-1. .i «- (bi+ s ize-l..i * ci+ s i ze -l.j) si2e 
endfor 
G.SET.AND.E: 

for i <r- 0 to 128-size by size 

ai+size-1. .i «- ((bi+size-l..i and ci+ s i z e-l..i) = 0) size 
endfor 
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G.SET.AND.NE: 

for i <- 0 to 128-size by size 

ai+size-l..i <~ (0>i+size-l..i and ci+ s i ze -l..i) * 0) siz e 
endfor 
G.SET.L: 

for i <- 0 to 128-size by size 

ai+size-l..i <- ((rc = rb) ? (bi+ s i ze -i..i < 0) : (bi+size-U < ci+si^-i^i)) 5 ^ 
endfor 
G.SET.GE: 

for i <- 0 to 128-size by size 

ai+size-l..i +- ((rc = rb) ? (bi+ s ize-l..i > 0) : (bi+s^e-L.i > ci-+size-l . .0)**" 
endfor 
G.SET.L.U: 

for i <r- 0 to 128-size by size 

ai+size-l..i <- ((rc = rb) ? (bi+ s ize-l..i > 0) : 
((0 || bi+ s i 2 e-i..i) < (0 || ci+size-l J))) siz e 

endfor 
G.SET.GE.U: 

for i <~ 0 to 128-size by size 

ai+size-l..i *- ((rc = rb) ? (bj+ s i 2e _i \ < 0) ; 
((0 || bi+ s i 2 e-i..i) > (0 || ci+ s i ze -l..i))) size 

endfor 

endcase 

RegWrite(rd, 128, a) 
enddef 
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Operation codes 



E CON 8 

X_# . \^ v^x ™ . u 


xjiociiiuic convolve Signed DyXcS 


E CON 16 


jz.ii^ciriuic convolve signeo douoiets 


E CON 32 


j^ii^ciiiuic convolve signcu quauiets 


E CON 64 


ijiiidciiiuic convolve signcu octlets 


E.CON.C.8 


■i^iiociiiuic convolve complex oytes 


E CON C 16 


r*nc^trinl^ r/Mii/Aiup rnmnlov /^/Mtklatn 
i^ii^c-muic convolve complex QOU DlctS 


E.CON.C.32 


ijiuuiiuiu ujuvuivc complex UUaQlClS 


E CON M 8 


x^iiaeniuie convolve niixeu*SlgncQ DytCS 


E CON M 16 


.GJidcniuie convolve mixeu-Signeu. uou Diets 


E CON M 32 


dibemuie convolve rnixeu-signea QU3.ulets 


E CON M 64 


Ensemble convolve mixed-signed octlets 


E CON TJ X 


Ensemble convolve unsigned bytes 


F CON 1116 


Ensemble convolve unsigned doublets 


F CON TT 


Ensemble convolve unsigned quadlets 


F CON T I 64 


Ensemble convolve unsigned octlets 


F DTV 64 


Ensemble divide signed octlets 


F HIV TI 64 

C.LJl V . U.O*t 


Ensemble divide unsigned octlets 


E MUL 8 


cnsemoie multiply signed bytes 


EMU! 16 


Ensemble multiply signed doublets 


E MUL 32 


cnsemoie multiply signed quadlets 


E MUL 64 


Enbcmuic muuipiy signed octlets 


E MUL SUM 8 


ciu>cinuie muuipiy sum signed oytes 


E.MUL.SUM.16 


x^iioeiiiuie uiuiiipiy burn binneu QOU Diets 


E.MUL.SUM.32 


Ensemble multinlv ^nm QionpH niif»rll^»tc 


E.MUL.SUM.64 


En^Pmhlf* milltinlv Clim e i rm^rl r^r-t] t±tc 
umviuui^ Jiiuiiipjy ouill MgUCU UCllClo 


E.MUL.C.8 


Ensemble cnmnlpY mnltinl\/ V»\/tp»o 

Arfiuwiuuiv vuiupieA. lllullipiy UylCI> 


E.MUL.C.16 


Ensemble rnmnlpv miilfinK/ HrniKl*»+o 
j^fiiov^iiii^iv vunipiwA uiuiiipiy UOUUietS 


E MUL C 32 


j^ii^eniuie complex multiply GUauietS 


E MUL M 8 


1-* flCAfl^ fa 1 A 1 lift v\ 1 « r i a * ji. m a _ _ 

cnsemDie muiupiy mixed-signed bytes 


E MUL M 16 


xjiidcjiiuic inumpiy nuxeu -signed doublets 


E MUL M 32 

» X TX. *■ -* * XT X # *S ^ 


tjii^cmoic muuipiy mixed-signed quadlets 


E MUL M 64 


cnsemDie multiply mixed-signed octlets 


E MUL P 8 


Ensemble multiply polynomial bytes 


E.MUL.P.16 


Ensemble multiply polynomial doublets 


E.MUL.P.32 


Ensemble multiply polynomial quadlets 


E.MUL.P.64 


Ensemble multiply polynomial octlets 


E.MUL.SUM.C.8 


Ensemble multiply sum complex bytes 


E.MUL.SUM.C. 16 


Ensemble multiply sum complex doublets 


E.MUL.SUM.C.32 


Ensemble multiply sum complex quadlets 


E.MUL.SUM.M.8 


Ensemble multiply sum mixed-signed bytes 


E.MUL.SUM.M. 16 


Ensemble multiply sum mixed-signed doublets 


E.MUL.SUM.M.32 


Ensemble multiply sum mixed-signed quadlets 


E.MUL.SUM.M.64 


Ensemble multiply sum mixed-signed octlets 
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E.MUL.SUM.U.8 
E.MUL.SUM.U.16 


Ensemble multiply sum unsigned bytes 
Ensemble multiply sum unsigned doublets 


E.MUL.SUM.U.32 
E.MUL.SUM.U.64 
E.MUL.U.8 


Ensemble multiply sum unsigned quadlets 
Ensemble multiply sum unsigned octlets 


E.MUL.U.16 


Ensemble multiply unsigned bytes 
Ensemble multiply unsigned doublets 


E.MUL.U.32 
E.MUL.U.64 


Ensemble multiply unsigned quadlets 
Ensemble multiply unsigned octlets 
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Format 



E.op.size rd=rc,rb 
rd=eopsize(rc,rb) 



31 24 23 18 17 12 11 65 

I E.size | rd f rc I rb T 



8 



_op_ 
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Definition 

def mul(size,h,vs,v,i > ws,wj) as 

mul <- ((vs&v size . 1+i )h-size y Vsi2e . 1+i j) * ((ws&w si2e .l +j )h- S i2e y Wsize . 1+ . 0 
enddef J " J 

def c PoIyMultiply(size,a,b) as 

p[0] <r- 0 2 *size 

fork <- 0 to size- 1 

p[k+l] <- p[k] A ak ? (O si *e-k || b || 0*) : o2*size 

endfor 

c <— p[size] 
enddef 

def EnsembIe(op,size,rd,rc,rb) 
c <- RegRead(rc, 128) 
b <- RegRead(rb, 128) 
case op of 

E.MUL:, E.MUL.C:, EMUL.SUM, E.MUL.SUM.C, E.CON, E.CON.C, E DIV- 

cs <-~ bs <- 1 
E.MUL.M:, EMUL.SUM.M, E.CON.M: 

cs <— 0 

bS <r~ 1 

E.MUL.U:, EMUL.SUM.U, E.CON.U, E.DIV.U, E.MUL.P: 

CS <r- bS <r- 0 

endcase 
case op of 

E.MUL, E.MUL.U, E.MUL.M: 
for i <- 0 to 64-size by size 

d 2*(i+size)-1..2*i <- mul(size,2*size,cs,c,i,bs,b,i) 
endfor 
E.MUL.P: 

for i 0 to 64-size by size 

d 2*(i+size)-L.2*i <- p olyMultiply(size,c s i 2e .l + i..i,b s i Z e-l+i i) 
endfor 
E.MUL.C: 

for i <- 0 to 64-size by size 

if (i and size) = 0 then 

else P *~ mul(si2e,2 * si2e ' 1 ' c ' i ' 1 ' b ' i > ' muI (size,2*size,l,c 5 i+size,l,b,i+size) 

p <- mul(size,2*size,l,c s i,l > b,i+size) + mul(size,2*size,l,c,i,l,b,i-hsize) 
endif ' 

d 2*(i+size)-L.2*i <- P 
endfor 

E MUL.SUM, E.MUL.SUM.U, E.MUL.SUM.M: 
p[0]<-0 128 

for i <r- 0 to 128-size by size 

p[i+size] <- p[i] + mul(size,128,cs,c,i > bs,b,i) 
endfor 
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a<-p[128] 
E.MUL.SUM.C: 

p[0] <- O 64 

p[size] <- 0 64 

for i <- 0 to 128-size by size 
if (i and size) = 0 then 

p[i+2*size] <- p[i] + mul(size,64,l,c,i,l,b,i) 

- mul(size,64, l,c,i+size, 1 ,b,i+size) 

else 

p[i+2*size] <- p[i] + mul(size,64J,c,i,l,b,i+size) 
+ mul(size,64,l,c,i+size,l,b,i) 

endif 
endfor 

a<-p[128+size] ||p[128] 

E.CON, E.CON.U, E.CON.M: 
p[0]<-0 12 8 

for j <r- 0 to 64-size by size 

for i <- 0 to 64-size by size 

pD+size]2*(i+size)-1..2*i «- p[j]2*(i+size)-1..2*i + 
mul(size,2*size,cs,c,i+64-j,bs,bj) 

endfor 
endfor 
a<-p[64] 
E.CON.C: 
• p[0]<-0l28 

for j <- 0 to 64-size by size 

for i <- 0 to 64-size by size 

if (H) and j and size) = 0 then 

pU + size] 2 *(i-fsize)-1..2*i <- pfi]2*(i+size)-1..2* 
mul(size,2*size, 1 ,c,i+64-j, 1 ,b j) 

else 

pU+size] 2 *(i+size)-1..2*i <- p[fl2*(i+size)-1..2* 
mul(size,2*size, 1 ,c,i+64-j+2*size, 1 ,b j) 

endif 
endfor 
endfor 
a <- p[64] 
E.DIV: 

if (b = 0) or ( (c = (1||0«)) and (b = l* 4 ) ) then 
a <~ undefined 
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E.DIV.U: 

ifb = Othen 

a <— undefined 

else 



q<-(0 || c)/(0 || b) 
r<-c-(0 ||q)*(0 || b) 
a<- T63..0 II q63.0 

endif 

endcase 

RegWrite(rd, 128, a) 
enddef 
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Floating-point function Definitions 

def eb <— ebits(prec) as 
case pref of 
16: 

eb<- 5 

32: 

eb<- 8 

64: 

eb<- 11 

128: 

eb <r- 15 

endcase 
enddef 

def eb <- ebias(prec) as 

eb«-0|| iebits(prec)-l 
enddef 

def fb <- fbits(prec) as 

fb <— prec - 1 - eb 
enddef 

def a <- F(prec, ai) as 
a.s <r- aip rec -i 

ae*- ai p rec-2..fbits(prec) 
af<-aift,its(prec)-1..0 
ifae= iebits(prec) ^ 
ifaf=Othen 

a t 4- INFINITY 
elseif affbits(prec)-l then 
a.t <- SNaN 
a.e <r- -fbits(prec) 
a.f<- 1 || af£bits(prec)-2..0 

else 

a t <- QNaN 
a.e < — fbits(prec) 
a.f <- af 

endif 
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elseif ae = 0 then 
if af = 0 then 

a t <- ZERO 

else 

a t <- NORM 

a.e <— l-ebias(prec)-fbits(prec) 
a.f <-0||af 

endif 

else 

a t <- NORM 

a.e <— ae-ebias(prec)-fbits(prec) 
a.f«- 1 || af 

endif 
enddef 

def a <- DEFAULTQNAN as 

a.s <r- 0 

a.t <~ QNAN 

a.e <- -1 

a.f <- 1 
enddef 

def a <- DEFAULTSNAN as 

a.s <- 0 

a.t <- SNAN 

a.e < — 1 

a.f<- 1 
enddef 

def fadd(a,b) as faddr(a,b,>J) enddef 

def c <— faddr(a,b,round) as 

if a.t=NORM and b.t=NORM then 

// d,e are a,b with exponent aligned and fraction adjusted 
if a.e > b.e then 
d <- a 
e.t <-~ b.t 
e.s <- b.s 
e.e <- a.e 

e.f<-b.f|| O a -e-b.e 
else if a.e < b.e then 
d.t <- a.t 
d.s <r~ a.s 
d.e <r- b.e 
d.f<-a.f|| O b ^-a.e 
e <~ b 
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endif 
c.t <- d.t 
c.e <- d.e 
if d.s = e.s then 
c.s <- d.s 
e.f <r- d.f + e.f 
elseif d.f > e.f then 
c.s <— d.s 
e.f <- d.f- e.f 
elseif d.f < e.f then 
c.s <- e.s 
c.f<-e.f-d.f 

else 

c.s <— r=F 
c.t <- ZERO 

endif 

// priority is given to b operand for NaN propagation 
elseif (b.t=SNAN) or (b.t=QNAN) then 
c <- b 

elseif (a.t=SNAN) or (a.t=QNAN) then 
c <— a 

elseif a.t=ZERO and b.t=ZERO then 
c.t <- ZERO 

c.s <- (a.s and b.s) or (round=F and (a.s or b.s)) 
// NULL values are like zero, but do not combine with ZERO to alter sign 
elseif a.t=ZERO or a.t=NULL then 
c <- b 

elseif b.t=ZERO or b.t=NULL then 
c <— a 

elseif a.t=INFINITY and b.t=INFINITY then 
if a.s * b.s then 

c <- DEFAULTSNAN // Invalid 

else 

c 4- a 

endif 

elseif a.t=INFINITY then 
c <- a 

elseif b.t=INFINITY then 
c <r- b 

else 

assert FALSE // should have covered al the cases above 

endif 
enddef 

def b <- fheg(a) as 

b.s < a.s 

b.t <r- a.t 

b.e <- a.e 

b.f«-a.f 
enddef 
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def fsubr(a,b,round) as faddr(a,fheg(b),round) enddef 

def frsub(a,b) as frsubr(a,b,N) enddef 

def frsubr(a,b,round) as faddr(feeg(a),b,round) enddef 

def c <— fcom(a,b) as 

if (a.t=SNAN) or (a.t=QNAN) or (b.t=SNAN) or (b.t=QNAN) then 
c<-U 

elseif a. t= INFINITY and b.t= INFINITY then 
if a.s * b.s then 

c <- (a.s=0) ? G: L 

else 

c <- E 

endif 

elseif a.t= INFINITY then 

c (a.s=0) ? G: L 
elseif b.t=INFINITY then 

c (b.s=0) ? G: L 
elseif a.t-NORM and b.t=NORM then 

if a.s * b.s then 

c <- (a.s=0) ? G: L 

else 

if a.e > b.e then 
af«-a.f 

bf<-b.f || 0 a e " b -e 

else 

af<-a.f ||0 be - a -e 
bf <- b.f 

endif 

ifaf=bfthen 

C <r- E 

else 

c <- ((a.s=0) A (af > bf)) ? G : L 

endif 

endif 

elseif a.t=NORM then 

c <- (a.s=0) ? G: L 
elseif b.t=NORM then 

c <- (b.s=0) ? G: L 
elseif a.t=ZERO and b.t=ZERO then 

c <- E 

else 

assert FALSE // should have covered al the cases above 

endif 
enddef 
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def c <- fmul(a,b) as 

if a.t-NORM and b.t=NORM then 
c.s <— a.s A b.s 
c.t <- NORM 



c.e <r- a.e + b.e 

c.f<-a.f *b.f 
// priority is given to b operand for NaN propagation 
elseif (b .t=SN AN) or (b.t=QNAN) then 

c.s <— a.s A b.s 

c.t <- b.t 

c.e <— b.e 

c.f«-b.f 

elseif (a.t=SNAN) or (a.t=QNAN) then 
c.s <r- a.s A b.s 
c.t <r~ a.t 
c.e <r- a.e 
c.f <r~ a.f 

elseif a.t=ZERO and b.t=INFINITY then 

c <- DEFAULTSNAN // Invalid 
elseif a.t==INFINITY and b.t=ZERO then 

c <- DEFAULTSNAN // Invalid 
elseif a.t=ZERO or b.t=ZERO then 

c.s <— a.s A b,s 

c.t <- ZERO 

else 

assert FALSE // should have covered al the cases above 

endif 
enddef 

def c <- fdivr(a,b) as 

if a.t=NORM and b.t=NORM then 
c.s a.s A b.s 
c.t <- NORM 
c.e <- a.e - b.e + 256 

c.f<-(a.f|| 0 256 )/b.f 
// priority is given to b operand for NaN propagation 
elseif (b.t=SNAN) or (b.t=QNAN) then 

c.s «— a.s A b.s 

c.t <- b.t 

c.e <- b.e 

c.f<-b.f 

elseif (a.t=SNAN) or (a.t=QNAN) then 
c.s <- a.s A b.s 
c.t <- a.t 
c.e <- a.e 
c.f +- a.f 



FIG. 29-5 



elseif a. t=ZERO and b.t=ZERO then 

c <- DEFAULTSNAN // Invalid 
elseif a.t=INFINITY and b.t= INFINITY then 

c <- DEFAULTSNAN // Invalid 
elseif a.t=ZERO then 

c.s <— a.s A b.s 

c.t <- ZERO 
elseif a.t=INFINITY then 

c.s «— a.s A b.s 

c.t <- INFINITY 

else 

assert FALSE // should have covered al the cases above 

endif 
enddef 

def msb <- findmsb(a) as 

MAXF <— 2*8 // Largest possible f value after matrix multiply 
forj <r- 0 toMAXF 

if aMAXF-1. j = (0 MA XF"H || 1) then 
msb «— j 

endif 
endfor 
enddef 

def ai «— PackF(prec,a,round) as 
case a.t of 
NORM: 



msb <— findmsb(a.f) 

rn <— msb-l-fbits(prec) // Isb for normal 

rdn < — ebias(prec)-a.e-l-fbits(prec) // lsb if a denormal 

rb <— (rn > rdn) ? rn : rdn 




ifrb<Othen 

aifr«-a.f ms b-l..o II 0" rb 
eadj <— 0 

else 

case round of 
C: 

s <_ omsb-rb y (^ a s )rb 

F: 

s <_ o msb " r b || (a.s) rb 
N, NONE: 

s <_ o^sb-rb || ^ a frb |, a fr^-l 

X: 

ifa.f r b-i..o*Othen 

raise FloatingPointArithmetic // Inexact 

endif 
s <- 0 

Z: 

s <- 0 

endcase 

v<-(0||a.f ms b..O) + (0||s) 
if v ms b = 1 then 

aifr<- v ms b-l..rb 
eadj <r- 0 

else 

aifr <- O^^sCprec) 
eadj <- 1 

endif 

endif 

aien <- a.e + msb - 1 + eadj + ebias(prec) 
if aien < 0 then 

if round = NONE then 

ai <- a.s || O eb its(prec) y a ifr 

else 

raise FloatingPointArithmetic //Underflow 

endif 

elseif aien > iebits(prec) ^en 
if round = NONE then 

//default: round-to-nearest overflow handling 
ai <- a.s || iebits(prec) y 0 fbits(prec) 

else 

raise FloatingPointArithmetic //Underflow 

endif 

else 

ai <- a.s || aienebits(prec)-l„0 II aifr 

endif 
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SNAN: 

if round * NONE then 

raise FloatingPointArithmetic //Invalid 

endif 

if -a.e < fbits(prec) then 

ai <- as || lebits(prec) y a .f- a . e -l..O II O ftits (prec)+ a .e 

else 

lsb <- a.f- a .e-l-fbits(prec)+1..0 * 0 

ai <- a.s || iebits(prec) y a.f. a .e-l..-a.e-l.fbit S (prec)+2 II lsb 

endif 
QNAN: 

if -a.e < fbits(prec) then 

ai <- a.s || lebits(prec) || a .f- a .e-1..0 II ofb^P^+a.e 

else 

lsb <- a.f- a e -i.fbits(prec)+1..0 * 0 

ai <- a .s || iebits(prec) y a.f. a . e -L.. a .e-l.fbits(prec)+2 II lsb 

endif 
ZERO: 

ai <r- a.s || 0 ebits (P rec ) || o^^CP 1 " 60 ) 
INFINITY: 

ai a.s || iebits(prec) y Qfbits(prec) 

endcase 
defdef 

def ai <— fsinkr(prec, a, round) as 
case a.t of 
NORM: 

msb <- findmsb(a.f) 
rb <— -a.e 
ifrb<Othen 

aifr^a.fmsb..O II 0' rb 
aims <- msb - rb 

else 

case round of 
C, C D: 

s omsb-rb y (^. a i s )rb 
F, F.D: 

s <- omsb-rb || (ai.s) rb 
N, NONE: 

s omsb-rb y frb y ai 

X: 

ifai.f r b-1..0^0then 

raise FloatingPointArithmetic // Inexact 

endif 
s <r~ 0 

Z, Z D: 

s <- 0 
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- endcase 

v <r- (0||a.fmsb..O) + (0||s) 
if v m sb = 1 then 

aims <- msb + 1 - rb 

else 

aims <- msb - rb 

endif 

aifr <r- v a ims..rb 

endif 

if aims > prec then 
case round of 

CD, F.D, NONE, Z D: 
ai <- a.s || (~as)P rec -! 

C, F, N, X, Z: 

raise FloatingPointArithmetic // Overflow 

endcase 
elseif a.s = 0 then 
ai <r- aifr 

else 

ai <r~ -aifr 

endif 
ZERO: 

ai <- 0P rec 
SNAN, QNAN: 
case round of 

CD, F.D, NONE, Z D: 

ai <- 0P rec 
C, F, N, X, Z: 

raise FloatingPointArithmetic // Invalid 

endcase 
INFINITY: 

case round of 

CD, F.D, NONE, Z D: 

ai <- a.s || (^as)P r ec-l 
C, F, N, X, Z: 

raise FloatingPointArithmetic // Invalid 



endcase 
endcase 
enddef 



def c <r- frecrest(a) as 
b.s 4- 0 
b.t <- NORM 
b.e«-0 
b.f<- 1 

c <- fest(fdiv(b,a)) 
enddef 
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def c <— frsqrest(a) as 



txs <- 0 

b.t <- NORM 

b e <- 0 



b.f <- 1 

c <- fest(fsqr(fdiv(b,a))) 
enddef 

def c <r- fest(a) as 

if(a.t=NORM) then 

msb <— findmsb(a.f) 
a.e <— a.e + msb - 13 
a.f a.f ms b..msb-12 It 1 

else 

c <r~ a 

endif 
enddef 

def c <- fsqr(a) as 

if (a.t=NORM) and (a.s=0) then 

C.S «— 0 

c.t <- NORM 
if (a.eo = 1) then 



c.e<-(a.e-128)/2 
c.f <- sqr(a.f || 0 128 ) 

endif 

elseif (a.t=SNAN) or (a.t=QNAN) or a.t=ZERO or ((a.t=INFINITY) and (a.s=0)) then 



elseif ((a.t=NORM) or (a.t=INFINITY)) and (a.s=l) then 
c <- DEFAULTSNAN // Invalid 

else 

assert FALSE // should have covered al the cases above 

endif 



c.e<-(a.e-127)/2 
c.f<-sqr(a.f||0 127 ) 



else 



c <r- a 



enddef 
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Operation codes 



tt a nn r i/c 


fcnsemble add floating-point half 




fcnsembie add floating-point half ceiling 


XI A nn XT 1 < T7 


Ensemble add floating-point half floor 


P Ann t; 1A M 


Ensemble add floating-point half nearest 


P Ann P 1A Y 


xfnsemble add floating-point half exact 


p a nn tr l *7 


ensemble add floating-point half zero 


P Ann p 70 


Ensemble add floating-point single 


p Ann p n c 


Ensemble add floating-point single ceiling 


p a nn p n T7 


Ensemble add floating-point single floor 


t? Ann r io "XT 
11. AUU .r . J z .IN 


Ensemble add floating-point single nearest 


X? Ann XT IO "V 


tinsemble add floating-point single exact 


xr Ann r 00 7 

11 . J\D D . r , J z . Zr 


Ensemble add floating-point single zero 


x? Ann it </t 


fcnsemble add floating-point double 




Ensemble add floating-point double ceiling 


x? a nn tr /J T? 
xs . AJJ JJ . r . o4 . r 


fcnsemble add floating-point double floor 


Ann xr </t xt 


Ensemble add floating-point double nearest 


xt Ann x? ^c/i v 
xi . ADD . r . 04 . A 


Ensemble add floating-point double exact 


xr Ann xr 7 
ii.AJJU.r .04.Z* 


Ensemble add floating-point double zero 


P a nn p nc 


Ensemble add floating-point quad 


p a nn p nc r 


Ensemble add floating-point quad ceiling 


F ADD F 198 F 


x^iibcjiiQic auu noaimg-poini quau rioor 


F ADD F 1 ?R TM 


Ensemble add floating-point quad nearest 


F ADD F 1 OR V 


nnscmoie aaa noaiing-point quad exact 


F ADD F 1 OR 7 


Ensemble add floating-point quad zero 


p m\/ p 1 /a 


Ensemble divide floating-point half 


p tyf\/ r i<r 
E.Dl v.r. 10. 


Ensemble divide floating-point half ceiling 


XT nn/ XT 1 XT 

xi. UI V.r. lo.r 


bnsemble divide floating-point half floor 


xr t*yi\/ r i a \t 
xi.JJIV.r. lo.lN 


Ensemble divide floating-point half nearest 


E.DIV.F.16.X 


Ensemble divide flontino-nnint Half* pvar>t 


E.DIV.F.16.Z 


Ensemble divide floating-point half zero 


E.DIV.F.32 


Ensemble divide floating-point single 


E.DIV.-F.32.C 


Ensemble divide floating-point single ceiling 


E.DIV.F.32.F 


Ensemble divide floating-point single floor 


E.DIV.F.32.N 


Ensemble divide floating-point single nearest 


E.DIV.F.32.X 


Ensemble divide floating-point single exact 


E.DIV.F.32.Z 


Ensemble divide floating-point single zero 


E.DIV.F.64 


Ensemble divide floating-point double 
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Ensemble divide floating-point double ceiling 


Tij5lV.F.64.F 


Ensemble divide floating-point double floor 


E.DIV.F.64.N 


Ensemble divide floating-point double nearest 


E.DIV.F.64.X 


Ensemble divide floating-point double exact 


E.DIV.F.64.Z 


Ensemble divide floating-point double zero 


E.DIV.F.128 


Ensemble divide floating-point quad 


E.DIV.F.128.C 


Ensemble divide floating-point quad ceiling 


E.DIV.F.128.F 


Ensemble divide floating-point quad floor 


E.DIV.F.128N 


Ensemble divide floating-point quad nearest 


E.DIV.F. 128.X 


Ensemble divide floating-point quad exact 


E.DIV.F.128.Z 


Ensemble divide floating-point quad zero 


E.MUL.C.F.16 


Ensemble multiply complex floating-point half 


E.MUL.CF.32 


Ensemble multiply complex floating-point single 


E.MUL.C.F.64 


Ensemble multiply complex floating-point double 


E.MUL.F.16 


Ensemble multiply floating-point half 


E.MUL.F.16.C 


Ensemble multiply floating-point half ceiling 


EMUL.F.16.F 


Ensemble multiply floating-point half floor 


E.MUL.F.16.N 


Ensemble multiply floating-point half nearest 


E.MUL.FJ6.X 


Ensemble multiply floating-point half exact 


E.MUL.F.16.Z 


Ensemble multiply floating-point half zero 


E.MUL.F.32 


Ensemble multiply floating-point single 


E.MUL.F.32.C 


Ensemble multiply floating-point single ceiling 


E.MUL.FJ2.F 


Ensemble multiply floating-point single floor 


E.MUL.F.32.N 


Ensemble multiply floating-point single nearest 


E.MUL.F.32.X 


Ensemble multiply floating-point single exact 


E.MUL.F.32.Z 


Ensemble multiply floating-point single zero 


E.MUL.F.64 


Ensemble multiply floating-point double 


E.MUL.F.64.C 


Ensemble multiply floating-point double ceiling 


E.MUL.F.64.F 


Ensemble multiply floating-point double floor 


E.MUL.F.64.N 


Ensemble multiply floating-point double nearest 


E.MUL.F.64.X 


Ensemble multiply floating-point double exact 




ensemble multiply tloating-point double zero 


E.MUL.F.128 


Ensemble multiply floating-point quad 


E.MUL.F.128.C 


Ensemble multiply floating-point quad ceiling 


E.MUL.F.128.F 


Ensemble multiply floating-point quad floor 


E.MUL.F.128.N 


Ensemble multiply floating-point quad nearest 


E.MUL.F.128.X 


Ensemble multiply floating-point quad exact 


E.MUL.F.128.Z 


Ensemble multiply floating-point quad zero 
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JUL 10*"° g 



Selection 



class 


op 


prec 


round/trap 


add 


EADDF 


16 


32 


64 


128 


NONE C F N X Z 


divide 


EDIVF 


16 


32 


64 


128 


NONE C F N X Z 


multiply 


EMULF 


16 


32 


64 


128 


NONE C F N X Z 


complex multiply 


EMUL.CF 


16 


32 


64 




NONE 



Format 

E.op.prec.round rd=rc,rb 

rd=eopprecround(rc,rb) 

^ 24 23 18J7 i2_n 6 5 

prec [ rd 

8 6 



1 — E -p; ec ' rd 1 rc I rb i ^=rj 
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Definition 

def mul(size,v,i,wj) as 

mul <- finul(F(size,v s i 2 e.l+i..i) ? F(si2e,w s ize-l+j.j)) 
enddef 

def EnsembleFloatingPoint(op,prec,round,ra,rb,rc) as 
c <r- RegRead(rc, 128) 
b RegRead(rb, 128) 
for i <— 0 to 128-prec by prec 
ci <- F(prec,ci+prec-l..i) 

bi<-F(prec,bi+ p rec-l..i) 
case op of 

E.ADD.F: 

ai <— faddr(ci,bi,round) 
E.MUL.F: 

ai <— fmul(ci,bi) 
E.MUL.C.F: 

if (i and prec) then 

ai <r- fadd(mul(prec,c,i,b,i-prec), mul(prec,c,i-prec,b,i)) 

else 

ai <- fsub(muI(prec,c,I,b s I), mul(prec,c,i+prec,b ? i+prec)) 

endif 
E.DIV.F .: 

ai <— fdiv(ci,bi) 

endcase 

ai+p r ec-l..i <- PackF(prec, ai, round) 
endfor 

RegWrite(rd, 128, a) 
enddef 
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Operation codes 



E SUB F 16 


Ensemble subtract flnatina-noint hair 


E SUB F 16 C 


Ensemble Subtract flnatincr-nnint half r^ilino 


E SUB F 16 F 


Ensemble subtract flnatina-nntnt half floor 

iviuviiiuiv Jui/u cxv^l iiuallllK UulUL Hull 11UUI 


E SUB F 16 N 


Ensemble subtract floatino-nnint half npnrpct 


E SUB F 16 Z 


Ensemble subtract floatina-nnint hair 


E.SUB.F.16.X 


Ensemble subtract floatinff-noint half pvarr 


E.SUB.F.32 


Ensemble subtract floatinff-nnint <;inalp 


E.SUB.F.32.C 


Ensemble subtract floatine-noint ^inalp reilino 


E.SUB.F.32.F 


Ensemble subtract floatine-noint ^intxlp floor 


E.SUB.F.32.N 


Ensemble subtract floating-point single nearest 


E.SUB.F.32.Z 


Ensemble subtract floating-point single zero 


E.SUB.F.32.X 


Ensemble subtract floating-point single exact 


E.SUB.F.64 


Ensemble subtract floating-point double 


E.SUB.F.64.C 


Ensemble subtract floating-point double ceiling 


E.SUB.F.64.F 


Ensemble subtract floating-point double floor 


E.SUB.F.64.N 


Ensemble subtract floating-point double nearest 


E.SUB.F.64.Z 


Ensemble subtract floating-point double zero 


E.SUB.F.64.X 


Ensemble subtract floating-point double exact 


E.SUB.F.128 


Ensemble subtract floating-point quad 


E.SUB.F.128.C 


Ensemble subtract floating-point quad ceiling 


E.SUB.F.128.F 


Ensemble subtract floating-point quad floor 


E.SUB.F.128.N 


Ensemble subtract floating-point quad nearest 


E.SUB.F.128.Z 


Ensemble subtract floating-point quad zero 


E. SUB F. 128.X 


Ensemble subtract floating-point quad exact 
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Selection 



class 


op 


prec 


round/trap 


set 


SET. 

E LG 
L GE 


16 32 64 128 


NONE X 


subtract 


SUB 


16 32 64 128 


NONE C F N X Z 



Format 



E.op. prec. round rd=rb,rc 



rd=eopprecround(rb,rc) 

31 24 23 



1 E.prec 



18 17 



12 11 



6 5 



rd 



rc 



rb 



8 



op. round 
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Definition 



def EnsembleReversedFIoatingPoint(op,prec,round,rd,rc,rb) as 
c <r- RegRead(rc, 128) 
b RegRead(rb, 128) 
for i <- 0 to 128-prec by prec 

ci <- F(prec,ci+ p rec-l..i) 

bi <r- F(prec,bi+ p rec-l.i) 

ai <— frsubr(ci,-bi, round) 

ai+prec-l..i <- PackF(prec, ai, round) 
endfor 

RegWrite(rd, 128, a) 
enddef 
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Operation codes 



X.COMPRESS.2 


Crossbar compress signed pecks 


X.COMPRESS.4 


Crossbar compress signed nibbles 


X. COMPRESS. 8 


Crossbar compress signed bytes 


X. COMPRESS. 16 


Crossbar compress signed doublets 


X.COMPRESS.32 


Crossbar compress signed quadlets 


X. COMPRESS. 64 


Crossbar compress signed octlets 


X.COMPRESS.128 


Crossbar compress signed hexlet 


X.COMPRESS.U.2 


Crossbar compress unsigned pecks 


X.COMPRESS.U.4 


Crossbar compress unsigned nibbles 


X.COMPRESS.U.8 


Crossbar compress unsigned bytes 


X.COMPRESS.U.16 


Crossbar compress unsigned doublets 


X.COMPRESS.U.32 


Crossbar compress unsigned quadlets 


X.COMPRESS.U.64 


Crossbar compress unsigned octlets 


X. COMPRESS. U. 128 


Crossbar compress unsigned hexlet 


X.EXP AND. 2 


Crossbar expand signed pecks 


X.EXP AND.4 


Crossbar expand signed nibbles 


X.EXP AND. 8 


Crossbar expand signed bytes 


X.EXPAND.16 


Crossbar expand signed doublets 


X.EXP AND. 32 


Crossbar expand signed quadlets 


X.EXP AND.64 


Crossbar expand signed octlets 


X.EXPAND.128 


Crossbar expand signed hexlet 


X.EXPAND.U.2 


Crossbar expand unsigned pecks 


X.EXP AND.U.4 


Crossbar expand unsigned nibbles 


X.EXPAND.U.8 


Crossbar expand unsigned bytes 


X.EXP AND .U. 16 


Crossbar expand unsigned doublets 


X.EXP AND.U.32 


Crossbar expand unsigned quadlets 


X.EXP AND.U.64 


Crossbar expand unsigned octlets 


X.EXP AND.U. 128 


Crossbar expand unsigned hexlet 


X.ROTL.2 


Crossbar rotate left pecks 


X.ROTL.4 


Crossbar rotate left nibbles 


X.ROTL.8 


Crossbar rotate left bytes 


XROTL 16 


Crrw^har rotate If* ft rlnnKl^te 


X.ROTL.32 


Crossbar rotate left quadlets 


X.ROTL.64 


Crossbar rotate left octlets 


XROTL. 128 


Crossbar rotate left hexlet 


X.ROTR.2 


Crossbar rotate right pecks 


X.ROTR.4 


Crossbar rotate right nibbles 


X.ROTR.8 


Crossbar rotate right bytes 


X.ROTR.16 


Crossbar rotate right doublets 
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X.ROTR32 


Crossbar rotate right quadlets 


X.ROTR.64 


Crossbar rotate right octlets 


X.KOTR.128 


Crossbar rotate right hexlet 


X.SHL.2 


Crossbar shift left pecks 


X.SHL.2.0 


Crossbar shift left signed pecks check overflow 


X.SHL.4 


Crossbar shift left nibbles 


X.SHLAO 


Crossbar shift left signed nibbles check overflow 


X.SHL.8 


Crossbar shift left bytes 


X.SHL.8. 0 


Crossbar shift left signed bytes check overflow 


X.SHL.16 


Crossbar shift left doublets 


X.SHL.16.0 


Crossbar shift left signed doublets check overflow 


X.SHL.32 


Crossbar shift left quadlets 


X.SHL.32.0 


Crossbar shift left signed quadlets check overflow 


X.SHL.64 


Crossbar shift left octlets 


X.SHL.64.0 


Crossbar shift left signed octlets check overflow 


X.SHL.128 


Crossbar shift left hexlet 


X.SHL.128. 0 


Crossbar shift left signed hexlet check overflow 


X.SHL.U.2.0 


Crossbar shift left unsigned pecks check overflow 


X.SHL.U.4.0 


Crossbar shift left unsigned nibbles check overflow 


X.SHL.U.8.0 


Crossbar shift left unsigned bytes check overflow 


X.SHL.U.16.0 


Crossbar shift left unsigned doublets check overflow 


X.SHL.U.32.0 


Crossbar shift left unsigned quadlets check overflow 


X.SHL.U.64.0 


Crossbar shift left unsigned octlets check overflow 


X.SHL.U.128.0 


Crossbar shift left unsigned hexlet check overflow 


X.SHR.2 


Crossbar signed shift right pecks 


X.SHR.4 


Crossbar signed shift right nibbles 


X.SHR.8 


Crossbar signed shift right bytes 


X.SHR.16 


Crossbar signed shift right doublets 


X.SHR.32 


Crossbar signed shift right quadlets 


X.SHR.64 


Crossbar signed shift right octlets 




v^rossoar signed smn ngnt nexiet 


X.SHR.U.2 


Crossbar shift right unsigned pecks 


X.SHR.U.4 


Crossbar shift right unsigned nibbles 


X.SHR.U.8 


Crossbar shift right unsigned bytes 


X.SHR.U.16 


Crossbar shift right unsigned doublets 


X.SHR.U.32 


Crossbar shift right unsigned quadlets 


X.SHR.U.64 


Crossbar shift right unsigned octlets 


X.SHR.U.128 


Crossbar shift right unsigned hexlet 



i 
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Selection 



class 


op 


size 


precision 


EXPAND EXP AND. U 
COMPRESS COMPRESS U 


2 4 8 16 32 64 128 


shift 


ROTR ROTL SHR SHL 
SHL.O SHL.U.O SHR.U 


2 4 8 16 32 64 128 



Format 

X. op. size rd=rc,rb 
rd=xopsize(rc, rb) 

?1 2 24 23 1«17 12 11 6 5 1 

I XSHIFT |s| rd | rc | rb | op j S2 
1 r 6 6 5 ' 42 

lsize <r- log(size) 
s <— Isize2 
sz <r- lsize i .o 
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Definition 



def Crossbar(op,size,rd,rc,rb) 
c <r- RegRead(rc, 128) 
b <- RegRead(rb, 128) 
shift <r- b and (size- 1 ) 
case ops.,2 II 0 2 of 

X.COMPRESS: 

hsize <- size/2 

for i <r- 0 to 64-hsize by hsize 
if shift < hsize then 

ai+hsize-l..i <- ci+i+ s hift+hsize-l..i+i+shift 

else 

ai+hsize-l..i «- cfj"%|jg?f || ci+i+ s ize-l..i+i+shift 

endif 

endfor 

ai27..64<-0 
X.COMPRESS.U: 

hsize <- size/2 

for i <- 0 to 64-hsize by hsize 
if shift < hsize then 

ai+hsize-l..i «~ c i+i+shift+hsize-l..i-fi+shift 

else 

ai+hsize-l..i <- oshift-hsize y ci +i+ size-l..i+i+shift 

endif 

endfor 

ai27..64<-0 
X.EXP AND: 

hsize <- size/2 

for i <r- 0 to 64-hsize by hsize 
if shift < hsize then 

ai+i+size-l..i+i <- chgjgfefjhjft || q +hsi2e _ L j || 0*Kft 

else 

ai+i+size-l..i+i <- q+size-shift-l..i II 0 shift 

endif 

endfor 
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X.EXP AND.U: 

hsize <r- size/2 

for i <r- 0 to 64-hsize by hsize 
if shift < hsize then 

ai+i+size-l..i+i <- ohsize-shift ( | cj +hsize _ L j || oshift 

else 

ai+i+size-1 .i+i <- Ci+ s i ze . shift . lB>i || oshift 

endif 

endfor 
X.ROTL: 

for i <- 0 to 128-size by size 

ai+size-l .i <- ci+ s ize-l -shift., i || ci+ s ize-l..i+size-l -shift 

endfor 

X.ROTR: 

for i +- 0 to 128-size by size 

ai+size-1 -i <" ci+ s hift-l..i II ci+ s ize-l..i+shift 
endfor 
X.SHL: 

for i <- 0 to 128-size by size 

ai+size-l.i ci+size-1-shift..i II O shi ft 
endfor 
X.SHL.O: 

for i <- 0 to 128-size by size 

if ci+size-l..i+size-1-shift * cfl4ize-l-5hift ^ 
raise FixedPointArithmetic 

endif 

ai+size-l.i <- cin-size- 1 -shift, ill O shi ft 
endfor 
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X.SHL.U.O: 

for i <— 0 to 128-size by size 

if ci+size-l ..i+size-shift * 0 shift then 
raise FixedPointArithmetic 

endif 

ai+size-l..i «- ci+size-1 -shift., ill 0 shift 
endfor 
X.SHR: 

for i <— 0 to 128-size by size 

ai+size- 1. .i <- e . { || c i+size . { i +s hift 
endfor 
X.SHR.U: 

for i <r- 0 to 128-size by size 

ai+size-l..i <~ OS^ift || c i+size . L>i+shift 
endfor 

endcase 

RegWrite(rd, 128, a) 
enddef 



FIG. 32C -3 



Compress 32 bits to 16, with 4-bit right shift 



FIG. 32D 




Format 

X.EXTRACT ra=rd,rc,rb 

ra=xextract(rd, rc,rb) 

£i 24 23 1817 1211 6 5 

I °P 1 rd | rc | rb | ~ 

8 6 6 6 6 



FIG. 33A 



Definition 



def CrossbarExtract(op,ra,rb,rc,rd) as 
d <r- RegRead(rd, 128) 
c <r- RegRead(rc, 128) 
b <r- RegRead(rb, 128) 
case b8„0 of 
0..255: 



256. 


gsize 
.383: 


<- 128 


384. 


gsize 
.447: 


<-64 


448. 


gsize 
.479: 


<~ 32 


480. 


gsize 
.495: 


<- 16 


496. 


gsize 
.503: 


<~ 8 


504. 


gsize 
.507: 


<-4 


508. 


gsize 
.511: 


<-2 




gsize 


<- 1 



endcase 
m +- b\2 

as <— signed <- bi4 
h <r- (2-m)*gsize 

spos <- (b8..0) and ((2-m)*gsize-l) 
dpos <- (0 || b23..16) and (gsize-1) 
sfsize <- (0 || b3 1..24) and (gsize-1) 

tfsize <r- (sfsize = 0) or ((sfsize+dpos) > gsize) ? gsize-dpos : sfsize 
fsize <r- (tfsize + spos > h) ? h - spos : tfsize 
for i <— 0 to 128-gsize by gsize 
case op of 

X.EXTRACT: 
if m then 

P<-dgsize+i-l„i 

else 

p^-(d||c)2*(g S ize+i)-1..2*i 

endif 

endcase 

v<-(as&ph-l)||p 

w <- (as & vspos+fsize-l)^-^-^ 03 It v fsize-l+spos..spos II 0<*P OS 
if m then 

asize-1+i..i <- c gsize-l+Ldpos+fsize+i II wd p0 s+fsize-l..dpos II cdpos-1+1. 

else 

asize-1+i..i <- w 

endif 
endfor 

RegWrite(ra, 128, a) 
enddef 
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Crossbar merge extract 



FIG. 33D 



X.SHUFFLE.4 


Crossbar shuffle within pecks 


X. SHUFFLE. 8 


Crossbar shuffle within bytes 


X. SHUFFLE. 16 


Crossbar shuffle within doublets 


X. SHUFFLE. 3 2 


Crossbar shuffle within quadlets 


X. SHUFFLE. 64 


Crossbar shuffle within octlets 


X. SHUFFLE. 128 


Crossbar shuffle within hexlet 


X. SHUFFLE. 256 


Crossbar shuffle within triclet 



FIG. 34A 



Format 

X.SHUFFLE.256 rd=rc,rb,v,w,h 
X. SHUFFLE, size rd=rcb,v,w 

rd=xshuffle256(rc,rb,v,w,h) 
rd=xshufflesize(rcb , v,w) 

31 24 23 18 17 12 11 65 0 

| X. SHUFFLE 1 rd | rc | rb | op 1 

8 6 6 6 6 

rc <— rb <— rcb 
x4-log2(size) 
y<-log2(v) 
z^-log2(w) 

op 4r- ((x*x*x-3*x*x-4*x)/6-(z*z-z)/2+x*z+y) + (size=256)*(h*32-56) 



FIG. 34B 




Definition 



def CrossbarShuffle(major,rd,rc,rb,op) 
c <- RegRead(rc, 128) 
b <- RegRead(rb, 128) 
if rc=rb then 
case op of 
0..55: 

for x «- 2 to 7; for y <~ 0 to x-2; for z <- 1 to x-y-1 

if op = ((x*x*x-3*x*x-4*x)/6-(z*z-z)/2+x*z+y) then 
fori<-0to 127 

ai <~ c 06..x II iy+z-l..y II ix-L.y+z II iy-l..o) 

end 

. endif 
endfor; endfor; endfor 
56.. 63: 

raise Reservedlnstruction 

endcase 

elseif 

case op4 o of 
0..27: 

cb c || b 
x <- 8 
h <— ops 

for y «- 0 to x-2; for z <- 1 to x-y-1 

if op4..0 = ((17*z-z*z)/2-8+y) then 
fori<-h*128to 127+h*128 

ai-h*128 <- cb (iy + zA y h ix .i.. y+z || iy _, 0) 

end 

endif 
endfor; endfor 
28..31: 

raise Reservedlnstruction 

endcase 

endif 

RegWrite(rd, 128, a) 
enddef 



FIG. 34C 
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